From 92f43c452c5313a7914eab2b08d966a6c5007baa Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Sun, 19 Apr 2015 00:05:14 +0200
Subject: [PATCH 001/734] kbuild/mkspec: Simplify vmlinux.bz2 creation

No need for the intermediary vmlinux.orig - bzip2 can keep the original
files used for compression with --keep.

Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 scripts/package/mkspec | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/scripts/package/mkspec b/scripts/package/mkspec
index d9ab94b17de0bc..89f9669d4f0070 100755
--- a/scripts/package/mkspec
+++ b/scripts/package/mkspec
@@ -111,10 +111,8 @@ echo 'cp System.map $RPM_BUILD_ROOT'"/boot/System.map-$KERNELRELEASE"
 echo 'cp .config $RPM_BUILD_ROOT'"/boot/config-$KERNELRELEASE"
 
 echo "%ifnarch ppc64"
-echo 'cp vmlinux vmlinux.orig'
-echo 'bzip2 -9 vmlinux'
+echo 'bzip2 -9 --keep vmlinux'
 echo 'mv vmlinux.bz2 $RPM_BUILD_ROOT'"/boot/vmlinux-$KERNELRELEASE.bz2"
-echo 'mv vmlinux.orig vmlinux'
 echo "%endif"
 
 if ! $PREBUILT; then

From dca0c0246fb739bccdd19ff2bfd0f02ccffdb07c Mon Sep 17 00:00:00 2001
From: Riku Voipio <riku.voipio@linaro.org>
Date: Thu, 16 Apr 2015 16:42:46 +0300
Subject: [PATCH 002/734] deb-pkg: move setting debarch for a separate function

create_package() function tries to resolve used architecture
for everry package. Split the setting the architecture to a
new function, set_debarch(), called once on startup.

This allows using debarch from other parts of script as
needed.

v2: Follow Michals suggestion on setting variables at
top scope and also setting the fallback $debarch in the
new function

Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 scripts/package/builddeb | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index 88dbf23b697082..fccabe5fb72bdc 100755
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -25,8 +25,13 @@ create_package() {
 	chown -R root:root "$pdir"
 	chmod -R go-w "$pdir"
 
+	# Create the package
+	dpkg-gencontrol $forcearch -Vkernel:debarch="${debarch}" -p$pname -P"$pdir"
+	dpkg --build "$pdir" ..
+}
+
+set_debarch() {
 	# Attempt to find the correct Debian architecture
-	local forcearch="" debarch=""
 	case "$UTS_MACHINE" in
 	i386|ia64|alpha)
 		debarch="$UTS_MACHINE" ;;
@@ -47,6 +52,7 @@ create_package() {
 	arm*)
 		debarch=arm$(grep -q CONFIG_AEABI=y $KCONFIG_CONFIG && echo el || true) ;;
 	*)
+		debarch=$(dpkg --print-architecture)
 		echo "" >&2
 		echo "** ** **  WARNING  ** ** **" >&2
 		echo "" >&2
@@ -59,13 +65,8 @@ create_package() {
 	if [ -n "$KBUILD_DEBARCH" ] ; then
 		debarch="$KBUILD_DEBARCH"
 	fi
-	if [ -n "$debarch" ] ; then
-		forcearch="-DArchitecture=$debarch"
-	fi
+	forcearch="-DArchitecture=$debarch"
 
-	# Create the package
-	dpkg-gencontrol $forcearch -Vkernel:debarch="${debarch:-$(dpkg --print-architecture)}" -p$pname -P"$pdir"
-	dpkg --build "$pdir" ..
 }
 
 # Some variables and settings used throughout the script
@@ -86,6 +87,9 @@ fwpackagename=linux-firmware-image-$version
 kernel_headers_packagename=linux-headers-$version
 libc_headers_packagename=linux-libc-dev
 dbg_packagename=$packagename-dbg
+debarch=
+forcearch=
+set_debarch
 
 if [ "$ARCH" = "um" ] ; then
 	packagename=user-mode-linux-$version

From 64178cb62c329350fe06622cd215264d849b27b1 Mon Sep 17 00:00:00 2001
From: Andrey Skvortsov <andrej.skvortzov@gmail.com>
Date: Mon, 16 Mar 2015 11:20:54 +0300
Subject: [PATCH 003/734] builddeb: fix stripped module signatures if
 CONFIG_DEBUG_INFO and CONFIG_MODULE_SIG_ALL are set

If CONFIG_MODULE_SIG_ALL is set, then user expects that all modules are
automatically signed in the result package, as it's for rpm-pkg, binrpm-pkg,
tar, tar-*. For deb-pkg this is correct only if CONFIG_DEBUG_INFO
is NOT set. In that case deb-package contains signed modules.

But if CONFIG_DEBUG_INFO is set, builddeb creates separate package with
debug information. To do that, debug information from all modules
is copied into separate files by objcopy. And loadable kernel modules are
stripped afterwards. Stripping removes previously (during modules_install)
added signatures from loadable kernel modules. Therefore final deb-package
contains unsigned modules despite of set option CONFIG_MODULE_SIG_ALL.

This patch resigns all stripped modules if CONFIG_MODULE_SIG_ALL is set
to solve this problem.

Signed-off-by: Andrey Skvortsov <andrej.skvortzov@gmail.com>
Acked-by: maximilian attems <max@stro.at>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 scripts/package/builddeb | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index fccabe5fb72bdc..222770c1b77510 100755
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -166,6 +166,12 @@ if grep -q '^CONFIG_MODULES=y' $KCONFIG_CONFIG ; then
 			# then add a link to those
 			$OBJCOPY --add-gnu-debuglink=$dbg_dir/usr/lib/debug/$module $tmpdir/$module
 		done
+
+		# resign stripped modules
+		MODULE_SIG_ALL="$(grep -s '^CONFIG_MODULE_SIG_ALL=y' $KCONFIG_CONFIG || true)"
+		if [ -n "$MODULE_SIG_ALL" ]; then
+			INSTALL_MOD_PATH="$tmpdir" $MAKE KBUILD_SRC= modules_sign
+		fi
 	fi
 fi
 

From ca2a9d2cf6cf3dd852c3926ac7e30ee774da4638 Mon Sep 17 00:00:00 2001
From: "Arnaud Patard (Rtp)" <arnaud.patard@rtp-net.org>
Date: Tue, 3 Feb 2015 13:16:33 +0100
Subject: [PATCH 004/734] deb-pkg: Add device tree blobs to the package

When building a package with make deb-pkg (say, for arm), the dtb files are
not added to the package. Given that things are still evolving on arm, it
make sense to have them along with the kernel and modules.

Signed-off-by: Arnaud Patard <arnaud.patard@rtp-net.org>
Reviewed-by: Ben Hutchings <ben@decadent.org.uk>
Acked-by: maximilian attems <max@stro.at>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 scripts/package/builddeb | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index 222770c1b77510..d30116b57e7e48 100755
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -147,6 +147,13 @@ else
 	cp arch/$ARCH/boot/$KBUILD_IMAGE "$tmpdir/$installed_image_path"
 fi
 
+if grep -q "^CONFIG_OF=y" $KCONFIG_CONFIG ; then
+	# Only some architectures with OF support have this target
+	if grep -q dtbs_install "${srctree}/arch/$SRCARCH/Makefile"; then
+		$MAKE KBUILD_SRC= INSTALL_DTBS_PATH="$tmpdir/usr/lib/$packagename" dtbs_install
+	fi
+fi
+
 if grep -q '^CONFIG_MODULES=y' $KCONFIG_CONFIG ; then
 	INSTALL_MOD_PATH="$tmpdir" $MAKE KBUILD_SRC= modules_install
 	rm -f "$tmpdir/lib/modules/$version/build"

From f9beafc9d8bf7febf673df9b41e13596ca669f75 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@freescale.com>
Date: Sat, 9 May 2015 17:09:27 -0300
Subject: [PATCH 005/734] coccinelle: pm_runtime: Insert blank line

Insert a blank line in order to improve the readability of the
generated patch and also make it consistent with the other
.cocci files.

Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Acked-by: Julia Lawall <julia.lawall@lip6.fr>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 scripts/coccinelle/api/pm_runtime.cocci | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/coccinelle/api/pm_runtime.cocci b/scripts/coccinelle/api/pm_runtime.cocci
index f01789e967ec4b..b7042d074078cb 100644
--- a/scripts/coccinelle/api/pm_runtime.cocci
+++ b/scripts/coccinelle/api/pm_runtime.cocci
@@ -1,5 +1,5 @@
 /// Make sure pm_runtime_* calls does not use unnecessary IS_ERR_VALUE
-//
+///
 // Keywords: pm_runtime
 // Confidence: Medium
 // Copyright (C) 2013 Texas Instruments Incorporated - GPLv2.

From fe8c46b632505a880c527bc9ae246e868aa3ece5 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@freescale.com>
Date: Sat, 9 May 2015 17:09:28 -0300
Subject: [PATCH 006/734] coccinelle: returnvar: Use imperative mood

According to Documentation/SubmittingPatches:

"Describe your changes in imperative mood, e.g. "make xyzzy do frotz"
instead of "[This patch] makes xyzzy do frotz" or "[I] changed xyzzy
to do frotz", as if you are giving orders to the codebase to change
its behaviour."

So do as recommended.

Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Acked-by: Julia Lawall <julia.lawall@lip6.fr>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 scripts/coccinelle/misc/returnvar.cocci | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/coccinelle/misc/returnvar.cocci b/scripts/coccinelle/misc/returnvar.cocci
index 605955a91c4493..d8286ef5307fca 100644
--- a/scripts/coccinelle/misc/returnvar.cocci
+++ b/scripts/coccinelle/misc/returnvar.cocci
@@ -1,5 +1,5 @@
 ///
-/// Removes unneeded variable used to store return value.
+/// Remove unneeded variable used to store return value.
 ///
 // Confidence: Moderate
 // Copyright: (C) 2012 Peter Senna Tschudin, INRIA/LIP6.  GPLv2.

From dd494ac0de48ded6a7ec0525f253116fde5c7be5 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@freescale.com>
Date: Sat, 9 May 2015 17:09:29 -0300
Subject: [PATCH 007/734] coccinelle: ifaddr: Fix the sentence

Make the sentence sensible.

Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Acked-by: Julia Lawall <julia.lawall@lip6.fr>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 scripts/coccinelle/misc/ifaddr.cocci | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/scripts/coccinelle/misc/ifaddr.cocci b/scripts/coccinelle/misc/ifaddr.cocci
index 8aebd1875e7526..c2663c677ac1c9 100644
--- a/scripts/coccinelle/misc/ifaddr.cocci
+++ b/scripts/coccinelle/misc/ifaddr.cocci
@@ -1,5 +1,4 @@
-/// the address of a variable or field is non-zero is likely always to bo
-/// non-zero
+/// The address of a variable or field is likely always to be non-zero.
 ///
 // Confidence: High
 // Copyright: (C) 2012 Julia Lawall, INRIA/LIP6.  GPLv2.

From ca34cba43168830dd96f8f6407282131733e6fb4 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@freescale.com>
Date: Sat, 9 May 2015 17:09:30 -0300
Subject: [PATCH 008/734] coccinelle: simple_open: Use imperative mood

According to Documentation/SubmittingPatches:

"Describe your changes in imperative mood, e.g. "make xyzzy do frotz"
instead of "[This patch] makes xyzzy do frotz" or "[I] changed xyzzy
to do frotz", as if you are giving orders to the codebase to change
its behaviour."

So do as recommended.

Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Acked-by: Julia Lawall <julia.lawall@lip6.fr>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 scripts/coccinelle/api/simple_open.cocci | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/coccinelle/api/simple_open.cocci b/scripts/coccinelle/api/simple_open.cocci
index b67e174f3d95ef..bd1a2a4ee106d0 100644
--- a/scripts/coccinelle/api/simple_open.cocci
+++ b/scripts/coccinelle/api/simple_open.cocci
@@ -1,5 +1,5 @@
-/// This removes an open coded simple_open() function
-/// and replaces file operations references to the function
+/// Remove an open coded simple_open() function
+/// and replace file operations references to the function
 /// with simple_open() instead.
 ///
 // Confidence: High

From 4341f6e5ce448dd79c3e663513213b936ba34c83 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@freescale.com>
Date: Wed, 20 May 2015 08:02:34 -0300
Subject: [PATCH 009/734] scripts/coccinelle/misc/semicolon.cocci: Use
 imperative mood

According to Documentation/SubmittingPatches:

"Describe your changes in imperative mood, e.g. "make xyzzy do frotz"
instead of "[This patch] makes xyzzy do frotz" or "[I] changed xyzzy
to do frotz", as if you are giving orders to the codebase to change
its behaviour.

So do as recommended.

Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Acked-by: Julia Lawall <julia.lawall@lip6.fr>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 scripts/coccinelle/misc/semicolon.cocci | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/coccinelle/misc/semicolon.cocci b/scripts/coccinelle/misc/semicolon.cocci
index a47eba2edc9e1e..6740c659a2b384 100644
--- a/scripts/coccinelle/misc/semicolon.cocci
+++ b/scripts/coccinelle/misc/semicolon.cocci
@@ -1,5 +1,5 @@
 ///
-/// Removes unneeded semicolon.
+/// Remove unneeded semicolon.
 ///
 // Confidence: Moderate
 // Copyright: (C) 2012 Peter Senna Tschudin, INRIA/LIP6.  GPLv2.

From 74de120d8096f72bdf95aba7234428c798d931cd Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@freescale.com>
Date: Wed, 20 May 2015 08:02:35 -0300
Subject: [PATCH 010/734] scripts/coccinelle/misc/irqf_oneshot.cocci: Fix
 grammar

Correct form is 'always requested'.

Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Acked-by: Julia Lawall <julia.lawall@lip6.fr>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 scripts/coccinelle/misc/irqf_oneshot.cocci | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/coccinelle/misc/irqf_oneshot.cocci b/scripts/coccinelle/misc/irqf_oneshot.cocci
index a24a754ae1d728..b17ac8b998947c 100644
--- a/scripts/coccinelle/misc/irqf_oneshot.cocci
+++ b/scripts/coccinelle/misc/irqf_oneshot.cocci
@@ -1,4 +1,4 @@
-/// Make sure threaded IRQs without a primary handler are always request with
+/// Make sure threaded IRQs without a primary handler are always requested with
 /// IRQF_ONESHOT
 ///
 //

From 4c8f20bb8e0ba6eecf62958bbf0502a2dc445ce6 Mon Sep 17 00:00:00 2001
From: Dmitry Kalinkin <dmitry.kalinkin@gmail.com>
Date: Thu, 21 May 2015 19:19:13 +0800
Subject: [PATCH 011/734] coccinelle: api: add vma_pages.cocci

This semantic patch replaces explicit computations of vma page count
with explicit function call.

Signed-off-by: Dmitry Kalinkin <dmitry.kalinkin@gmail.com>
Acked-by: Julia Lawall <julia.lawall@lip6.fr>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 scripts/coccinelle/api/vma_pages.cocci | 60 ++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)
 create mode 100644 scripts/coccinelle/api/vma_pages.cocci

diff --git a/scripts/coccinelle/api/vma_pages.cocci b/scripts/coccinelle/api/vma_pages.cocci
new file mode 100644
index 00000000000000..3e52e11ea1dc8a
--- /dev/null
+++ b/scripts/coccinelle/api/vma_pages.cocci
@@ -0,0 +1,60 @@
+///
+/// Use vma_pages function on vma object instead of explicit computation.
+///
+//  Confidence: High
+//  Keywords: vma_pages vma
+//  Comment: Based on resource_size.cocci
+
+virtual context
+virtual patch
+virtual org
+virtual report
+
+//----------------------------------------------------------
+//  For context mode
+//----------------------------------------------------------
+
+@r_context depends on context && !patch && !org && !report@
+struct vm_area_struct *vma;
+@@
+
+* (vma->vm_end - vma->vm_start) >> PAGE_SHIFT
+
+//----------------------------------------------------------
+//  For patch mode
+//----------------------------------------------------------
+
+@r_patch depends on !context && patch && !org && !report@
+struct vm_area_struct *vma;
+@@
+
+- ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)
++ vma_pages(vma)
+
+//----------------------------------------------------------
+//  For org mode
+//----------------------------------------------------------
+
+@r_org depends on !context && !patch && (org || report)@
+struct vm_area_struct *vma;
+position p;
+@@
+
+  (vma->vm_end@p - vma->vm_start) >> PAGE_SHIFT
+
+@script:python depends on report@
+p << r_org.p;
+x << r_org.vma;
+@@
+
+msg="WARNING: Consider using vma_pages helper on %s" % (x)
+coccilib.report.print_report(p[0], msg)
+
+@script:python depends on org@
+p << r_org.p;
+x << r_org.vma;
+@@
+
+msg="WARNING: Consider using vma_pages helper on %s" % (x)
+msg_safe=msg.replace("[","@(").replace("]",")")
+coccilib.org.print_todo(p[0], msg_safe)

From 9473a62f779d78bae646e7ef1a792d53ad4ac29e Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@freescale.com>
Date: Sun, 24 May 2015 17:45:54 -0300
Subject: [PATCH 012/734] coccinelle: irqf_oneshot.cocci: Improve the generated
 commit log

Improve the commit log of the generated patch by mentioning the commit
log that makes threaded IRQs without a primary handler to be requested
with the IRQF_ONESHOT flag.

Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Acked- by: Valentin Rothberg <valentinrothberg@gmail.com>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 scripts/coccinelle/misc/irqf_oneshot.cocci | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/scripts/coccinelle/misc/irqf_oneshot.cocci b/scripts/coccinelle/misc/irqf_oneshot.cocci
index b17ac8b998947c..b421150a2effcd 100644
--- a/scripts/coccinelle/misc/irqf_oneshot.cocci
+++ b/scripts/coccinelle/misc/irqf_oneshot.cocci
@@ -1,5 +1,8 @@
-/// Make sure threaded IRQs without a primary handler are always requested with
-/// IRQF_ONESHOT
+/// Since commit 1c6c69525b40 ("genirq: Reject bogus threaded irq requests")
+/// threaded IRQs without a primary handler need to be requested with
+/// IRQF_ONESHOT, otherwise the request will fail.
+///
+/// So pass the IRQF_ONESHOT flag in this case.
 ///
 //
 // Confidence: Good

From f94c56f4f33dd34551af6bcc1afde5082fdf6e86 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@freescale.com>
Date: Mon, 1 Jun 2015 22:52:20 -0300
Subject: [PATCH 013/734] coccinelle: simple_return: Add a blank line

Insert a blank line in order to improve the readability of the
generated patch and also make it consistent with the other
.cocci files.

Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Acked-by: Julia Lawall <julia.lawall@lip6.fr>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 scripts/coccinelle/misc/simple_return.cocci | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/coccinelle/misc/simple_return.cocci b/scripts/coccinelle/misc/simple_return.cocci
index 47f7084b6360a0..e8b6313b116f57 100644
--- a/scripts/coccinelle/misc/simple_return.cocci
+++ b/scripts/coccinelle/misc/simple_return.cocci
@@ -1,6 +1,6 @@
 /// Simplify a trivial if-return sequence.  Possibly combine with a
 /// preceding function call.
-//
+///
 // Confidence: High
 // Copyright: (C) 2014 Julia Lawall, INRIA/LIP6.  GPLv2.
 // Copyright: (C) 2014 Gilles Muller, INRIA/LiP6.  GPLv2.

From d0fe116b4554d79125f384f7ba23722b41c3cb93 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 24 Apr 2015 10:27:40 -0700
Subject: [PATCH 014/734] gitignore: Add MIPS vmlinux.32 to the list

MIPS64 kernels builds will produce a vmlinux.32 kernel image for
compatibility, ignore them.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 4ad4a98b884b9c..34d6bad9317b36 100644
--- a/.gitignore
+++ b/.gitignore
@@ -44,6 +44,7 @@ Module.symvers
 /TAGS
 /linux
 /vmlinux
+/vmlinux.32
 /vmlinux-gdb.py
 /vmlinuz
 /System.map

From a37161c0588c0d3ff4afb08ef83106a80bde604e Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 16 Apr 2015 14:02:41 -0700
Subject: [PATCH 015/734] Kbuild: Add ID files to .gitignore

I use GNU id-utils to find code (essentially a database backed grep),
which generates an ID file to maintain its data.

Add ID to the .gitignore file.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index 34d6bad9317b36..98b91fccff45fc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -90,6 +90,9 @@ GRTAGS
 GSYMS
 GTAGS
 
+# id-utils files
+ID
+
 *.orig
 *~
 \#*#

From 21a59991ce0cd9a0b54b135305e3fcf880f2aaf1 Mon Sep 17 00:00:00 2001
From: Jim Davis <jim.epost@gmail.com>
Date: Mon, 8 Jun 2015 13:19:08 -0700
Subject: [PATCH 016/734] scripts/package/Makefile: rpmbuild is needed for rpm
 targets

Before rpm release 4.1, in 2002, either the rpm command or the
rpmbuild command could be used in the rpm-pkg or binrpm-pkg targets,
and the Makefile chose the rpm command if the rpmbuild command wasn't
found.

After release 4.1, however, the rpm command could no longer be used in
place of the rpmbuild command.  As the rpmbuild command is not
installed by default, this can lead to failures with the rpm-pkg and
binrpm-pkg targets:

rpm --define "_builddir ." --target \
    x86_64 -bb ./binkernel.spec
    rpm --target: unknown option
    scripts/package/Makefile:60: recipe for target 'binrpm-pkg' failed

Change the Makefile to use rpmbuild unconditionally to avoid this.

Signed-off-by: Jim Davis <jim.epost@gmail.com>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 scripts/package/Makefile | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/scripts/package/Makefile b/scripts/package/Makefile
index 99ca6e76eb0a53..8b11d5adec7f58 100644
--- a/scripts/package/Makefile
+++ b/scripts/package/Makefile
@@ -21,10 +21,6 @@
 # Note that the rpm-pkg target cannot be used with KBUILD_OUTPUT,
 # but the binrpm-pkg target can; for some reason O= gets ignored.
 
-# Do we have rpmbuild, otherwise fall back to the older rpm
-RPM := $(shell if [ -x "/usr/bin/rpmbuild" ]; then echo rpmbuild; \
-	           else echo rpm; fi)
-
 # Remove hyphens since they have special meaning in RPM filenames
 KERNELPATH := kernel-$(subst -,_,$(KERNELRELEASE))
 # Include only those top-level files that are needed by make, plus the GPL copy
@@ -51,7 +47,7 @@ rpm-pkg rpm: FORCE
 	rm -f $(objtree)/.scmversion
 	$(CONFIG_SHELL) $(srctree)/scripts/mkversion > $(objtree)/.tmp_version
 	mv -f $(objtree)/.tmp_version $(objtree)/.version
-	$(RPM) $(RPMOPTS) --target $(UTS_MACHINE) -ta $(KERNELPATH).tar.gz
+	rpmbuild --target $(UTS_MACHINE) -ta $(KERNELPATH).tar.gz
 	rm $(KERNELPATH).tar.gz kernel.spec
 
 # binrpm-pkg
@@ -62,7 +58,7 @@ binrpm-pkg: FORCE
 	$(CONFIG_SHELL) $(srctree)/scripts/mkversion > $(objtree)/.tmp_version
 	mv -f $(objtree)/.tmp_version $(objtree)/.version
 
-	$(RPM) $(RPMOPTS) --define "_builddir $(objtree)" --target \
+	rpmbuild --define "_builddir $(objtree)" --target \
 		$(UTS_MACHINE) -bb $(objtree)/binkernel.spec
 	rm binkernel.spec
 

From 04dc91ce2cca5927159c689aa1f47663f8c51530 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Mon, 13 Jul 2015 12:26:44 +0200
Subject: [PATCH 017/734] regmap: Add better support for devices without
 readback support

Currently regmap requires that a reg_read callback is supplied, otherwise a
warning is emitted each time regmap_read() is called. This means a device
or bus without readback support needs to supply dummy reg_read callback.
Apart from that regmap_read() will still work fine if a cache is used.

Remove the warning and let regmap_readable() return false if not reg_read
callback is supplied. This means a device no longer has to supply a dummy
callback if it does not support readback and it also doesn't have to have a
readable_reg callback that always returns false since this is now implicit.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 7111d04f26218b..8894b992043e04 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -93,6 +93,9 @@ bool regmap_writeable(struct regmap *map, unsigned int reg)
 
 bool regmap_readable(struct regmap *map, unsigned int reg)
 {
+	if (!map->reg_read)
+		return false;
+
 	if (map->max_register && reg > map->max_register)
 		return false;
 
@@ -2097,8 +2100,6 @@ static int _regmap_read(struct regmap *map, unsigned int reg,
 	int ret;
 	void *context = _regmap_map_get_context(map);
 
-	WARN_ON(!map->reg_read);
-
 	if (!map->cache_bypass) {
 		ret = regcache_read(map, reg, val);
 		if (ret == 0)

From 671a2781ff01abf4fdc8904881fc3abd3a8279af Mon Sep 17 00:00:00 2001
From: Jeff Vander Stoep <jeffv@google.com>
Date: Fri, 10 Jul 2015 17:19:55 -0400
Subject: [PATCH 018/734] security: add ioctl specific auditing to lsm_audit

Add information about ioctl calls to the LSM audit data. Log the
file path and command number.

Signed-off-by: Jeff Vander Stoep <jeffv@google.com>
Acked-by: Nick Kralevich <nnk@google.com>
[PM: subject line tweak]
Signed-off-by: Paul Moore <pmoore@redhat.com>
---
 include/linux/lsm_audit.h |  7 +++++++
 security/lsm_audit.c      | 15 +++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
index 1cc89e9df480cc..ffb9c9da4f39f8 100644
--- a/include/linux/lsm_audit.h
+++ b/include/linux/lsm_audit.h
@@ -40,6 +40,11 @@ struct lsm_network_audit {
 	} fam;
 };
 
+struct lsm_ioctlop_audit {
+	struct path path;
+	u16 cmd;
+};
+
 /* Auxiliary data to use in generating the audit record. */
 struct common_audit_data {
 	char type;
@@ -53,6 +58,7 @@ struct common_audit_data {
 #define LSM_AUDIT_DATA_KMOD	8
 #define LSM_AUDIT_DATA_INODE	9
 #define LSM_AUDIT_DATA_DENTRY	10
+#define LSM_AUDIT_DATA_IOCTL_OP	11
 	union 	{
 		struct path path;
 		struct dentry *dentry;
@@ -68,6 +74,7 @@ struct common_audit_data {
 		} key_struct;
 #endif
 		char *kmod_name;
+		struct lsm_ioctlop_audit *op;
 	} u;
 	/* this union contains LSM specific data */
 	union {
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index 1d34277dc402b5..9f6c649c65e92a 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -245,6 +245,21 @@ static void dump_common_audit_data(struct audit_buffer *ab,
 		}
 		break;
 	}
+	case LSM_AUDIT_DATA_IOCTL_OP: {
+		struct inode *inode;
+
+		audit_log_d_path(ab, " path=", &a->u.op->path);
+
+		inode = a->u.op->path.dentry->d_inode;
+		if (inode) {
+			audit_log_format(ab, " dev=");
+			audit_log_untrustedstring(ab, inode->i_sb->s_id);
+			audit_log_format(ab, " ino=%lu", inode->i_ino);
+		}
+
+		audit_log_format(ab, " ioctlcmd=%hx", a->u.op->cmd);
+		break;
+	}
 	case LSM_AUDIT_DATA_DENTRY: {
 		struct inode *inode;
 

From fa1aa143ac4a682c7f5fd52a3cf05f5a6fe44a0a Mon Sep 17 00:00:00 2001
From: Jeff Vander Stoep <jeffv@google.com>
Date: Fri, 10 Jul 2015 17:19:56 -0400
Subject: [PATCH 019/734] selinux: extended permissions for ioctls

Add extended permissions logic to selinux. Extended permissions
provides additional permissions in 256 bit increments. Extend the
generic ioctl permission check to use the extended permissions for
per-command filtering. Source/target/class sets including the ioctl
permission may additionally include a set of commands. Example:

allowxperm <source> <target>:<class> ioctl unpriv_app_socket_cmds
auditallowxperm <source> <target>:<class> ioctl priv_gpu_cmds

Where unpriv_app_socket_cmds and priv_gpu_cmds are macros
representing commonly granted sets of ioctl commands.

When ioctl commands are omitted only the permissions are checked.
This feature is intended to provide finer granularity for the ioctl
permission that may be too imprecise. For example, the same driver
may use ioctls to provide important and benign functionality such as
driver version or socket type as well as dangerous capabilities such
as debugging features, read/write/execute to physical memory or
access to sensitive data. Per-command filtering provides a mechanism
to reduce the attack surface of the kernel, and limit applications
to the subset of commands required.

The format of the policy binary has been modified to include ioctl
commands, and the policy version number has been incremented to
POLICYDB_VERSION_XPERMS_IOCTL=30 to account for the format
change.

The extended permissions logic is deliberately generic to allow
components to be reused e.g. netlink filters

Signed-off-by: Jeff Vander Stoep <jeffv@google.com>
Acked-by: Nick Kralevich <nnk@google.com>
Signed-off-by: Paul Moore <pmoore@redhat.com>
---
 security/selinux/avc.c              | 415 +++++++++++++++++++++++++++-
 security/selinux/hooks.c            |  42 ++-
 security/selinux/include/avc.h      |   6 +
 security/selinux/include/security.h |  32 ++-
 security/selinux/ss/avtab.c         | 104 ++++++-
 security/selinux/ss/avtab.h         |  33 ++-
 security/selinux/ss/conditional.c   |  32 ++-
 security/selinux/ss/conditional.h   |   6 +-
 security/selinux/ss/policydb.c      |   5 +
 security/selinux/ss/services.c      | 213 ++++++++++++--
 security/selinux/ss/services.h      |   6 +
 11 files changed, 834 insertions(+), 60 deletions(-)

diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index 3c17dda9571d4e..2d5e1b04cd5029 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -22,6 +22,7 @@
 #include <linux/init.h>
 #include <linux/skbuff.h>
 #include <linux/percpu.h>
+#include <linux/list.h>
 #include <net/sock.h>
 #include <linux/un.h>
 #include <net/af_unix.h>
@@ -48,6 +49,7 @@ struct avc_entry {
 	u32			tsid;
 	u16			tclass;
 	struct av_decision	avd;
+	struct avc_xperms_node	*xp_node;
 };
 
 struct avc_node {
@@ -56,6 +58,16 @@ struct avc_node {
 	struct rcu_head		rhead;
 };
 
+struct avc_xperms_decision_node {
+	struct extended_perms_decision xpd;
+	struct list_head xpd_list; /* list of extended_perms_decision */
+};
+
+struct avc_xperms_node {
+	struct extended_perms xp;
+	struct list_head xpd_head; /* list head of extended_perms_decision */
+};
+
 struct avc_cache {
 	struct hlist_head	slots[AVC_CACHE_SLOTS]; /* head for avc_node->list */
 	spinlock_t		slots_lock[AVC_CACHE_SLOTS]; /* lock for writes */
@@ -80,6 +92,9 @@ DEFINE_PER_CPU(struct avc_cache_stats, avc_cache_stats) = { 0 };
 static struct avc_cache avc_cache;
 static struct avc_callback_node *avc_callbacks;
 static struct kmem_cache *avc_node_cachep;
+static struct kmem_cache *avc_xperms_data_cachep;
+static struct kmem_cache *avc_xperms_decision_cachep;
+static struct kmem_cache *avc_xperms_cachep;
 
 static inline int avc_hash(u32 ssid, u32 tsid, u16 tclass)
 {
@@ -170,7 +185,17 @@ void __init avc_init(void)
 	atomic_set(&avc_cache.lru_hint, 0);
 
 	avc_node_cachep = kmem_cache_create("avc_node", sizeof(struct avc_node),
-					     0, SLAB_PANIC, NULL);
+					0, SLAB_PANIC, NULL);
+	avc_xperms_cachep = kmem_cache_create("avc_xperms_node",
+					sizeof(struct avc_xperms_node),
+					0, SLAB_PANIC, NULL);
+	avc_xperms_decision_cachep = kmem_cache_create(
+					"avc_xperms_decision_node",
+					sizeof(struct avc_xperms_decision_node),
+					0, SLAB_PANIC, NULL);
+	avc_xperms_data_cachep = kmem_cache_create("avc_xperms_data",
+					sizeof(struct extended_perms_data),
+					0, SLAB_PANIC, NULL);
 
 	audit_log(current->audit_context, GFP_KERNEL, AUDIT_KERNEL, "AVC INITIALIZED\n");
 }
@@ -205,9 +230,261 @@ int avc_get_hash_stats(char *page)
 			 slots_used, AVC_CACHE_SLOTS, max_chain_len);
 }
 
+/*
+ * using a linked list for extended_perms_decision lookup because the list is
+ * always small. i.e. less than 5, typically 1
+ */
+static struct extended_perms_decision *avc_xperms_decision_lookup(u8 driver,
+					struct avc_xperms_node *xp_node)
+{
+	struct avc_xperms_decision_node *xpd_node;
+
+	list_for_each_entry(xpd_node, &xp_node->xpd_head, xpd_list) {
+		if (xpd_node->xpd.driver == driver)
+			return &xpd_node->xpd;
+	}
+	return NULL;
+}
+
+static inline unsigned int
+avc_xperms_has_perm(struct extended_perms_decision *xpd,
+					u8 perm, u8 which)
+{
+	unsigned int rc = 0;
+
+	if ((which == XPERMS_ALLOWED) &&
+			(xpd->used & XPERMS_ALLOWED))
+		rc = security_xperm_test(xpd->allowed->p, perm);
+	else if ((which == XPERMS_AUDITALLOW) &&
+			(xpd->used & XPERMS_AUDITALLOW))
+		rc = security_xperm_test(xpd->auditallow->p, perm);
+	else if ((which == XPERMS_DONTAUDIT) &&
+			(xpd->used & XPERMS_DONTAUDIT))
+		rc = security_xperm_test(xpd->dontaudit->p, perm);
+	return rc;
+}
+
+static void avc_xperms_allow_perm(struct avc_xperms_node *xp_node,
+				u8 driver, u8 perm)
+{
+	struct extended_perms_decision *xpd;
+	security_xperm_set(xp_node->xp.drivers.p, driver);
+	xpd = avc_xperms_decision_lookup(driver, xp_node);
+	if (xpd && xpd->allowed)
+		security_xperm_set(xpd->allowed->p, perm);
+}
+
+static void avc_xperms_decision_free(struct avc_xperms_decision_node *xpd_node)
+{
+	struct extended_perms_decision *xpd;
+
+	xpd = &xpd_node->xpd;
+	if (xpd->allowed)
+		kmem_cache_free(avc_xperms_data_cachep, xpd->allowed);
+	if (xpd->auditallow)
+		kmem_cache_free(avc_xperms_data_cachep, xpd->auditallow);
+	if (xpd->dontaudit)
+		kmem_cache_free(avc_xperms_data_cachep, xpd->dontaudit);
+	kmem_cache_free(avc_xperms_decision_cachep, xpd_node);
+}
+
+static void avc_xperms_free(struct avc_xperms_node *xp_node)
+{
+	struct avc_xperms_decision_node *xpd_node, *tmp;
+
+	if (!xp_node)
+		return;
+
+	list_for_each_entry_safe(xpd_node, tmp, &xp_node->xpd_head, xpd_list) {
+		list_del(&xpd_node->xpd_list);
+		avc_xperms_decision_free(xpd_node);
+	}
+	kmem_cache_free(avc_xperms_cachep, xp_node);
+}
+
+static void avc_copy_xperms_decision(struct extended_perms_decision *dest,
+					struct extended_perms_decision *src)
+{
+	dest->driver = src->driver;
+	dest->used = src->used;
+	if (dest->used & XPERMS_ALLOWED)
+		memcpy(dest->allowed->p, src->allowed->p,
+				sizeof(src->allowed->p));
+	if (dest->used & XPERMS_AUDITALLOW)
+		memcpy(dest->auditallow->p, src->auditallow->p,
+				sizeof(src->auditallow->p));
+	if (dest->used & XPERMS_DONTAUDIT)
+		memcpy(dest->dontaudit->p, src->dontaudit->p,
+				sizeof(src->dontaudit->p));
+}
+
+/*
+ * similar to avc_copy_xperms_decision, but only copy decision
+ * information relevant to this perm
+ */
+static inline void avc_quick_copy_xperms_decision(u8 perm,
+			struct extended_perms_decision *dest,
+			struct extended_perms_decision *src)
+{
+	/*
+	 * compute index of the u32 of the 256 bits (8 u32s) that contain this
+	 * command permission
+	 */
+	u8 i = perm >> 5;
+
+	dest->used = src->used;
+	if (dest->used & XPERMS_ALLOWED)
+		dest->allowed->p[i] = src->allowed->p[i];
+	if (dest->used & XPERMS_AUDITALLOW)
+		dest->auditallow->p[i] = src->auditallow->p[i];
+	if (dest->used & XPERMS_DONTAUDIT)
+		dest->dontaudit->p[i] = src->dontaudit->p[i];
+}
+
+static struct avc_xperms_decision_node
+		*avc_xperms_decision_alloc(u8 which)
+{
+	struct avc_xperms_decision_node *xpd_node;
+	struct extended_perms_decision *xpd;
+
+	xpd_node = kmem_cache_zalloc(avc_xperms_decision_cachep,
+				GFP_ATOMIC | __GFP_NOMEMALLOC);
+	if (!xpd_node)
+		return NULL;
+
+	xpd = &xpd_node->xpd;
+	if (which & XPERMS_ALLOWED) {
+		xpd->allowed = kmem_cache_zalloc(avc_xperms_data_cachep,
+						GFP_ATOMIC | __GFP_NOMEMALLOC);
+		if (!xpd->allowed)
+			goto error;
+	}
+	if (which & XPERMS_AUDITALLOW) {
+		xpd->auditallow = kmem_cache_zalloc(avc_xperms_data_cachep,
+						GFP_ATOMIC | __GFP_NOMEMALLOC);
+		if (!xpd->auditallow)
+			goto error;
+	}
+	if (which & XPERMS_DONTAUDIT) {
+		xpd->dontaudit = kmem_cache_zalloc(avc_xperms_data_cachep,
+						GFP_ATOMIC | __GFP_NOMEMALLOC);
+		if (!xpd->dontaudit)
+			goto error;
+	}
+	return xpd_node;
+error:
+	avc_xperms_decision_free(xpd_node);
+	return NULL;
+}
+
+static int avc_add_xperms_decision(struct avc_node *node,
+			struct extended_perms_decision *src)
+{
+	struct avc_xperms_decision_node *dest_xpd;
+
+	node->ae.xp_node->xp.len++;
+	dest_xpd = avc_xperms_decision_alloc(src->used);
+	if (!dest_xpd)
+		return -ENOMEM;
+	avc_copy_xperms_decision(&dest_xpd->xpd, src);
+	list_add(&dest_xpd->xpd_list, &node->ae.xp_node->xpd_head);
+	return 0;
+}
+
+static struct avc_xperms_node *avc_xperms_alloc(void)
+{
+	struct avc_xperms_node *xp_node;
+
+	xp_node = kmem_cache_zalloc(avc_xperms_cachep,
+				GFP_ATOMIC|__GFP_NOMEMALLOC);
+	if (!xp_node)
+		return xp_node;
+	INIT_LIST_HEAD(&xp_node->xpd_head);
+	return xp_node;
+}
+
+static int avc_xperms_populate(struct avc_node *node,
+				struct avc_xperms_node *src)
+{
+	struct avc_xperms_node *dest;
+	struct avc_xperms_decision_node *dest_xpd;
+	struct avc_xperms_decision_node *src_xpd;
+
+	if (src->xp.len == 0)
+		return 0;
+	dest = avc_xperms_alloc();
+	if (!dest)
+		return -ENOMEM;
+
+	memcpy(dest->xp.drivers.p, src->xp.drivers.p, sizeof(dest->xp.drivers.p));
+	dest->xp.len = src->xp.len;
+
+	/* for each source xpd allocate a destination xpd and copy */
+	list_for_each_entry(src_xpd, &src->xpd_head, xpd_list) {
+		dest_xpd = avc_xperms_decision_alloc(src_xpd->xpd.used);
+		if (!dest_xpd)
+			goto error;
+		avc_copy_xperms_decision(&dest_xpd->xpd, &src_xpd->xpd);
+		list_add(&dest_xpd->xpd_list, &dest->xpd_head);
+	}
+	node->ae.xp_node = dest;
+	return 0;
+error:
+	avc_xperms_free(dest);
+	return -ENOMEM;
+
+}
+
+static inline u32 avc_xperms_audit_required(u32 requested,
+					struct av_decision *avd,
+					struct extended_perms_decision *xpd,
+					u8 perm,
+					int result,
+					u32 *deniedp)
+{
+	u32 denied, audited;
+
+	denied = requested & ~avd->allowed;
+	if (unlikely(denied)) {
+		audited = denied & avd->auditdeny;
+		if (audited && xpd) {
+			if (avc_xperms_has_perm(xpd, perm, XPERMS_DONTAUDIT))
+				audited &= ~requested;
+		}
+	} else if (result) {
+		audited = denied = requested;
+	} else {
+		audited = requested & avd->auditallow;
+		if (audited && xpd) {
+			if (!avc_xperms_has_perm(xpd, perm, XPERMS_AUDITALLOW))
+				audited &= ~requested;
+		}
+	}
+
+	*deniedp = denied;
+	return audited;
+}
+
+static inline int avc_xperms_audit(u32 ssid, u32 tsid, u16 tclass,
+				u32 requested, struct av_decision *avd,
+				struct extended_perms_decision *xpd,
+				u8 perm, int result,
+				struct common_audit_data *ad)
+{
+	u32 audited, denied;
+
+	audited = avc_xperms_audit_required(
+			requested, avd, xpd, perm, result, &denied);
+	if (likely(!audited))
+		return 0;
+	return slow_avc_audit(ssid, tsid, tclass, requested,
+			audited, denied, result, ad, 0);
+}
+
 static void avc_node_free(struct rcu_head *rhead)
 {
 	struct avc_node *node = container_of(rhead, struct avc_node, rhead);
+	avc_xperms_free(node->ae.xp_node);
 	kmem_cache_free(avc_node_cachep, node);
 	avc_cache_stats_incr(frees);
 }
@@ -221,6 +498,7 @@ static void avc_node_delete(struct avc_node *node)
 
 static void avc_node_kill(struct avc_node *node)
 {
+	avc_xperms_free(node->ae.xp_node);
 	kmem_cache_free(avc_node_cachep, node);
 	avc_cache_stats_incr(frees);
 	atomic_dec(&avc_cache.active_nodes);
@@ -367,6 +645,7 @@ static int avc_latest_notif_update(int seqno, int is_insert)
  * @tsid: target security identifier
  * @tclass: target security class
  * @avd: resulting av decision
+ * @xp_node: resulting extended permissions
  *
  * Insert an AVC entry for the SID pair
  * (@ssid, @tsid) and class @tclass.
@@ -378,7 +657,9 @@ static int avc_latest_notif_update(int seqno, int is_insert)
  * the access vectors into a cache entry, returns
  * avc_node inserted. Otherwise, this function returns NULL.
  */
-static struct avc_node *avc_insert(u32 ssid, u32 tsid, u16 tclass, struct av_decision *avd)
+static struct avc_node *avc_insert(u32 ssid, u32 tsid, u16 tclass,
+				struct av_decision *avd,
+				struct avc_xperms_node *xp_node)
 {
 	struct avc_node *pos, *node = NULL;
 	int hvalue;
@@ -391,10 +672,15 @@ static struct avc_node *avc_insert(u32 ssid, u32 tsid, u16 tclass, struct av_dec
 	if (node) {
 		struct hlist_head *head;
 		spinlock_t *lock;
+		int rc = 0;
 
 		hvalue = avc_hash(ssid, tsid, tclass);
 		avc_node_populate(node, ssid, tsid, tclass, avd);
-
+		rc = avc_xperms_populate(node, xp_node);
+		if (rc) {
+			kmem_cache_free(avc_node_cachep, node);
+			return NULL;
+		}
 		head = &avc_cache.slots[hvalue];
 		lock = &avc_cache.slots_lock[hvalue];
 
@@ -523,14 +809,17 @@ int __init avc_add_callback(int (*callback)(u32 event), u32 events)
  * @perms : Permission mask bits
  * @ssid,@tsid,@tclass : identifier of an AVC entry
  * @seqno : sequence number when decision was made
+ * @xpd: extended_perms_decision to be added to the node
  *
  * if a valid AVC entry doesn't exist,this function returns -ENOENT.
  * if kmalloc() called internal returns NULL, this function returns -ENOMEM.
  * otherwise, this function updates the AVC entry. The original AVC-entry object
  * will release later by RCU.
  */
-static int avc_update_node(u32 event, u32 perms, u32 ssid, u32 tsid, u16 tclass,
-			   u32 seqno)
+static int avc_update_node(u32 event, u32 perms, u8 driver, u8 xperm, u32 ssid,
+			u32 tsid, u16 tclass, u32 seqno,
+			struct extended_perms_decision *xpd,
+			u32 flags)
 {
 	int hvalue, rc = 0;
 	unsigned long flag;
@@ -574,9 +863,19 @@ static int avc_update_node(u32 event, u32 perms, u32 ssid, u32 tsid, u16 tclass,
 
 	avc_node_populate(node, ssid, tsid, tclass, &orig->ae.avd);
 
+	if (orig->ae.xp_node) {
+		rc = avc_xperms_populate(node, orig->ae.xp_node);
+		if (rc) {
+			kmem_cache_free(avc_node_cachep, node);
+			goto out_unlock;
+		}
+	}
+
 	switch (event) {
 	case AVC_CALLBACK_GRANT:
 		node->ae.avd.allowed |= perms;
+		if (node->ae.xp_node && (flags & AVC_EXTENDED_PERMS))
+			avc_xperms_allow_perm(node->ae.xp_node, driver, xperm);
 		break;
 	case AVC_CALLBACK_TRY_REVOKE:
 	case AVC_CALLBACK_REVOKE:
@@ -594,6 +893,9 @@ static int avc_update_node(u32 event, u32 perms, u32 ssid, u32 tsid, u16 tclass,
 	case AVC_CALLBACK_AUDITDENY_DISABLE:
 		node->ae.avd.auditdeny &= ~perms;
 		break;
+	case AVC_CALLBACK_ADD_XPERMS:
+		avc_add_xperms_decision(node, xpd);
+		break;
 	}
 	avc_node_replace(node, orig);
 out_unlock:
@@ -665,18 +967,20 @@ int avc_ss_reset(u32 seqno)
  * results in a bigger stack frame.
  */
 static noinline struct avc_node *avc_compute_av(u32 ssid, u32 tsid,
-			 u16 tclass, struct av_decision *avd)
+			 u16 tclass, struct av_decision *avd,
+			 struct avc_xperms_node *xp_node)
 {
 	rcu_read_unlock();
-	security_compute_av(ssid, tsid, tclass, avd);
+	INIT_LIST_HEAD(&xp_node->xpd_head);
+	security_compute_av(ssid, tsid, tclass, avd, &xp_node->xp);
 	rcu_read_lock();
-	return avc_insert(ssid, tsid, tclass, avd);
+	return avc_insert(ssid, tsid, tclass, avd, xp_node);
 }
 
 static noinline int avc_denied(u32 ssid, u32 tsid,
-			 u16 tclass, u32 requested,
-			 unsigned flags,
-			 struct av_decision *avd)
+				u16 tclass, u32 requested,
+				u8 driver, u8 xperm, unsigned flags,
+				struct av_decision *avd)
 {
 	if (flags & AVC_STRICT)
 		return -EACCES;
@@ -684,11 +988,91 @@ static noinline int avc_denied(u32 ssid, u32 tsid,
 	if (selinux_enforcing && !(avd->flags & AVD_FLAGS_PERMISSIVE))
 		return -EACCES;
 
-	avc_update_node(AVC_CALLBACK_GRANT, requested, ssid,
-				tsid, tclass, avd->seqno);
+	avc_update_node(AVC_CALLBACK_GRANT, requested, driver, xperm, ssid,
+				tsid, tclass, avd->seqno, NULL, flags);
 	return 0;
 }
 
+/*
+ * The avc extended permissions logic adds an additional 256 bits of
+ * permissions to an avc node when extended permissions for that node are
+ * specified in the avtab. If the additional 256 permissions is not adequate,
+ * as-is the case with ioctls, then multiple may be chained together and the
+ * driver field is used to specify which set contains the permission.
+ */
+int avc_has_extended_perms(u32 ssid, u32 tsid, u16 tclass, u32 requested,
+			u8 driver, u8 xperm, struct common_audit_data *ad)
+{
+	struct avc_node *node;
+	struct av_decision avd;
+	u32 denied;
+	struct extended_perms_decision local_xpd;
+	struct extended_perms_decision *xpd = NULL;
+	struct extended_perms_data allowed;
+	struct extended_perms_data auditallow;
+	struct extended_perms_data dontaudit;
+	struct avc_xperms_node local_xp_node;
+	struct avc_xperms_node *xp_node;
+	int rc = 0, rc2;
+
+	xp_node = &local_xp_node;
+	BUG_ON(!requested);
+
+	rcu_read_lock();
+
+	node = avc_lookup(ssid, tsid, tclass);
+	if (unlikely(!node)) {
+		node = avc_compute_av(ssid, tsid, tclass, &avd, xp_node);
+	} else {
+		memcpy(&avd, &node->ae.avd, sizeof(avd));
+		xp_node = node->ae.xp_node;
+	}
+	/* if extended permissions are not defined, only consider av_decision */
+	if (!xp_node || !xp_node->xp.len)
+		goto decision;
+
+	local_xpd.allowed = &allowed;
+	local_xpd.auditallow = &auditallow;
+	local_xpd.dontaudit = &dontaudit;
+
+	xpd = avc_xperms_decision_lookup(driver, xp_node);
+	if (unlikely(!xpd)) {
+		/*
+		 * Compute the extended_perms_decision only if the driver
+		 * is flagged
+		 */
+		if (!security_xperm_test(xp_node->xp.drivers.p, driver)) {
+			avd.allowed &= ~requested;
+			goto decision;
+		}
+		rcu_read_unlock();
+		security_compute_xperms_decision(ssid, tsid, tclass, driver,
+						&local_xpd);
+		rcu_read_lock();
+		avc_update_node(AVC_CALLBACK_ADD_XPERMS, requested, driver, xperm,
+				ssid, tsid, tclass, avd.seqno, &local_xpd, 0);
+	} else {
+		avc_quick_copy_xperms_decision(xperm, &local_xpd, xpd);
+	}
+	xpd = &local_xpd;
+
+	if (!avc_xperms_has_perm(xpd, xperm, XPERMS_ALLOWED))
+		avd.allowed &= ~requested;
+
+decision:
+	denied = requested & ~(avd.allowed);
+	if (unlikely(denied))
+		rc = avc_denied(ssid, tsid, tclass, requested, driver, xperm,
+				AVC_EXTENDED_PERMS, &avd);
+
+	rcu_read_unlock();
+
+	rc2 = avc_xperms_audit(ssid, tsid, tclass, requested,
+			&avd, xpd, xperm, rc, ad);
+	if (rc2)
+		return rc2;
+	return rc;
+}
 
 /**
  * avc_has_perm_noaudit - Check permissions but perform no auditing.
@@ -716,6 +1100,7 @@ inline int avc_has_perm_noaudit(u32 ssid, u32 tsid,
 			 struct av_decision *avd)
 {
 	struct avc_node *node;
+	struct avc_xperms_node xp_node;
 	int rc = 0;
 	u32 denied;
 
@@ -725,13 +1110,13 @@ inline int avc_has_perm_noaudit(u32 ssid, u32 tsid,
 
 	node = avc_lookup(ssid, tsid, tclass);
 	if (unlikely(!node))
-		node = avc_compute_av(ssid, tsid, tclass, avd);
+		node = avc_compute_av(ssid, tsid, tclass, avd, &xp_node);
 	else
 		memcpy(avd, &node->ae.avd, sizeof(*avd));
 
 	denied = requested & ~(avd->allowed);
 	if (unlikely(denied))
-		rc = avc_denied(ssid, tsid, tclass, requested, flags, avd);
+		rc = avc_denied(ssid, tsid, tclass, requested, 0, 0, flags, avd);
 
 	rcu_read_unlock();
 	return rc;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 692e3cc8ce2393..a049b72162707a 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3216,6 +3216,46 @@ static void selinux_file_free_security(struct file *file)
 	file_free_security(file);
 }
 
+/*
+ * Check whether a task has the ioctl permission and cmd
+ * operation to an inode.
+ */
+int ioctl_has_perm(const struct cred *cred, struct file *file,
+		u32 requested, u16 cmd)
+{
+	struct common_audit_data ad;
+	struct file_security_struct *fsec = file->f_security;
+	struct inode *inode = file_inode(file);
+	struct inode_security_struct *isec = inode->i_security;
+	struct lsm_ioctlop_audit ioctl;
+	u32 ssid = cred_sid(cred);
+	int rc;
+	u8 driver = cmd >> 8;
+	u8 xperm = cmd & 0xff;
+
+	ad.type = LSM_AUDIT_DATA_IOCTL_OP;
+	ad.u.op = &ioctl;
+	ad.u.op->cmd = cmd;
+	ad.u.op->path = file->f_path;
+
+	if (ssid != fsec->sid) {
+		rc = avc_has_perm(ssid, fsec->sid,
+				SECCLASS_FD,
+				FD__USE,
+				&ad);
+		if (rc)
+			goto out;
+	}
+
+	if (unlikely(IS_PRIVATE(inode)))
+		return 0;
+
+	rc = avc_has_extended_perms(ssid, isec->sid, isec->sclass,
+			requested, driver, xperm, &ad);
+out:
+	return rc;
+}
+
 static int selinux_file_ioctl(struct file *file, unsigned int cmd,
 			      unsigned long arg)
 {
@@ -3258,7 +3298,7 @@ static int selinux_file_ioctl(struct file *file, unsigned int cmd,
 	 * to the file's ioctl() function.
 	 */
 	default:
-		error = file_has_perm(cred, file, FILE__IOCTL);
+		error = ioctl_has_perm(cred, file, FILE__IOCTL, (u16) cmd);
 	}
 	return error;
 }
diff --git a/security/selinux/include/avc.h b/security/selinux/include/avc.h
index ddf8eec03f2117..db12ff14277b24 100644
--- a/security/selinux/include/avc.h
+++ b/security/selinux/include/avc.h
@@ -142,6 +142,7 @@ static inline int avc_audit(u32 ssid, u32 tsid,
 }
 
 #define AVC_STRICT 1 /* Ignore permissive mode. */
+#define AVC_EXTENDED_PERMS 2	/* update extended permissions */
 int avc_has_perm_noaudit(u32 ssid, u32 tsid,
 			 u16 tclass, u32 requested,
 			 unsigned flags,
@@ -151,6 +152,10 @@ int avc_has_perm(u32 ssid, u32 tsid,
 		 u16 tclass, u32 requested,
 		 struct common_audit_data *auditdata);
 
+int avc_has_extended_perms(u32 ssid, u32 tsid, u16 tclass, u32 requested,
+		u8 driver, u8 perm, struct common_audit_data *ad);
+
+
 u32 avc_policy_seqno(void);
 
 #define AVC_CALLBACK_GRANT		1
@@ -161,6 +166,7 @@ u32 avc_policy_seqno(void);
 #define AVC_CALLBACK_AUDITALLOW_DISABLE	32
 #define AVC_CALLBACK_AUDITDENY_ENABLE	64
 #define AVC_CALLBACK_AUDITDENY_DISABLE	128
+#define AVC_CALLBACK_ADD_XPERMS		256
 
 int avc_add_callback(int (*callback)(u32 event), u32 events);
 
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index 36993ad1c067a8..6a681d26bf20a6 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -35,13 +35,14 @@
 #define POLICYDB_VERSION_NEW_OBJECT_DEFAULTS	27
 #define POLICYDB_VERSION_DEFAULT_TYPE	28
 #define POLICYDB_VERSION_CONSTRAINT_NAMES	29
+#define POLICYDB_VERSION_XPERMS_IOCTL	30
 
 /* Range of policy versions we understand*/
 #define POLICYDB_VERSION_MIN   POLICYDB_VERSION_BASE
 #ifdef CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX
 #define POLICYDB_VERSION_MAX	CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX_VALUE
 #else
-#define POLICYDB_VERSION_MAX	POLICYDB_VERSION_CONSTRAINT_NAMES
+#define POLICYDB_VERSION_MAX	POLICYDB_VERSION_XPERMS_IOCTL
 #endif
 
 /* Mask for just the mount related flags */
@@ -109,11 +110,38 @@ struct av_decision {
 	u32 flags;
 };
 
+#define XPERMS_ALLOWED 1
+#define XPERMS_AUDITALLOW 2
+#define XPERMS_DONTAUDIT 4
+
+#define security_xperm_set(perms, x) (perms[x >> 5] |= 1 << (x & 0x1f))
+#define security_xperm_test(perms, x) (1 & (perms[x >> 5] >> (x & 0x1f)))
+struct extended_perms_data {
+	u32 p[8];
+};
+
+struct extended_perms_decision {
+	u8 used;
+	u8 driver;
+	struct extended_perms_data *allowed;
+	struct extended_perms_data *auditallow;
+	struct extended_perms_data *dontaudit;
+};
+
+struct extended_perms {
+	u16 len;	/* length associated decision chain */
+	struct extended_perms_data drivers; /* flag drivers that are used */
+};
+
 /* definitions of av_decision.flags */
 #define AVD_FLAGS_PERMISSIVE	0x0001
 
 void security_compute_av(u32 ssid, u32 tsid,
-			 u16 tclass, struct av_decision *avd);
+			 u16 tclass, struct av_decision *avd,
+			 struct extended_perms *xperms);
+
+void security_compute_xperms_decision(u32 ssid, u32 tsid, u16 tclass,
+			 u8 driver, struct extended_perms_decision *xpermd);
 
 void security_compute_av_user(u32 ssid, u32 tsid,
 			     u16 tclass, struct av_decision *avd);
diff --git a/security/selinux/ss/avtab.c b/security/selinux/ss/avtab.c
index b64f2772b03019..3628d3a868b669 100644
--- a/security/selinux/ss/avtab.c
+++ b/security/selinux/ss/avtab.c
@@ -24,6 +24,7 @@
 #include "policydb.h"
 
 static struct kmem_cache *avtab_node_cachep;
+static struct kmem_cache *avtab_xperms_cachep;
 
 /* Based on MurmurHash3, written by Austin Appleby and placed in the
  * public domain.
@@ -70,11 +71,24 @@ avtab_insert_node(struct avtab *h, int hvalue,
 		  struct avtab_key *key, struct avtab_datum *datum)
 {
 	struct avtab_node *newnode;
+	struct avtab_extended_perms *xperms;
 	newnode = kmem_cache_zalloc(avtab_node_cachep, GFP_KERNEL);
 	if (newnode == NULL)
 		return NULL;
 	newnode->key = *key;
-	newnode->datum = *datum;
+
+	if (key->specified & AVTAB_XPERMS) {
+		xperms = kmem_cache_zalloc(avtab_xperms_cachep, GFP_KERNEL);
+		if (xperms == NULL) {
+			kmem_cache_free(avtab_node_cachep, newnode);
+			return NULL;
+		}
+		*xperms = *(datum->u.xperms);
+		newnode->datum.u.xperms = xperms;
+	} else {
+		newnode->datum.u.data = datum->u.data;
+	}
+
 	if (prev) {
 		newnode->next = prev->next;
 		prev->next = newnode;
@@ -107,8 +121,12 @@ static int avtab_insert(struct avtab *h, struct avtab_key *key, struct avtab_dat
 		if (key->source_type == cur->key.source_type &&
 		    key->target_type == cur->key.target_type &&
 		    key->target_class == cur->key.target_class &&
-		    (specified & cur->key.specified))
+		    (specified & cur->key.specified)) {
+			/* extended perms may not be unique */
+			if (specified & AVTAB_XPERMS)
+				break;
 			return -EEXIST;
+		}
 		if (key->source_type < cur->key.source_type)
 			break;
 		if (key->source_type == cur->key.source_type &&
@@ -271,6 +289,9 @@ void avtab_destroy(struct avtab *h)
 		while (cur) {
 			temp = cur;
 			cur = cur->next;
+			if (temp->key.specified & AVTAB_XPERMS)
+				kmem_cache_free(avtab_xperms_cachep,
+						temp->datum.u.xperms);
 			kmem_cache_free(avtab_node_cachep, temp);
 		}
 	}
@@ -359,7 +380,10 @@ static uint16_t spec_order[] = {
 	AVTAB_AUDITALLOW,
 	AVTAB_TRANSITION,
 	AVTAB_CHANGE,
-	AVTAB_MEMBER
+	AVTAB_MEMBER,
+	AVTAB_XPERMS_ALLOWED,
+	AVTAB_XPERMS_AUDITALLOW,
+	AVTAB_XPERMS_DONTAUDIT
 };
 
 int avtab_read_item(struct avtab *a, void *fp, struct policydb *pol,
@@ -369,10 +393,11 @@ int avtab_read_item(struct avtab *a, void *fp, struct policydb *pol,
 {
 	__le16 buf16[4];
 	u16 enabled;
-	__le32 buf32[7];
 	u32 items, items2, val, vers = pol->policyvers;
 	struct avtab_key key;
 	struct avtab_datum datum;
+	struct avtab_extended_perms xperms;
+	__le32 buf32[ARRAY_SIZE(xperms.perms.p)];
 	int i, rc;
 	unsigned set;
 
@@ -429,11 +454,15 @@ int avtab_read_item(struct avtab *a, void *fp, struct policydb *pol,
 			printk(KERN_ERR "SELinux: avtab: entry has both access vectors and types\n");
 			return -EINVAL;
 		}
+		if (val & AVTAB_XPERMS) {
+			printk(KERN_ERR "SELinux: avtab: entry has extended permissions\n");
+			return -EINVAL;
+		}
 
 		for (i = 0; i < ARRAY_SIZE(spec_order); i++) {
 			if (val & spec_order[i]) {
 				key.specified = spec_order[i] | enabled;
-				datum.data = le32_to_cpu(buf32[items++]);
+				datum.u.data = le32_to_cpu(buf32[items++]);
 				rc = insertf(a, &key, &datum, p);
 				if (rc)
 					return rc;
@@ -476,14 +505,42 @@ int avtab_read_item(struct avtab *a, void *fp, struct policydb *pol,
 		return -EINVAL;
 	}
 
-	rc = next_entry(buf32, fp, sizeof(u32));
-	if (rc) {
-		printk(KERN_ERR "SELinux: avtab: truncated entry\n");
-		return rc;
+	if ((vers < POLICYDB_VERSION_XPERMS_IOCTL) &&
+			(key.specified & AVTAB_XPERMS)) {
+		printk(KERN_ERR "SELinux:  avtab:  policy version %u does not "
+				"support extended permissions rules and one "
+				"was specified\n", vers);
+		return -EINVAL;
+	} else if (key.specified & AVTAB_XPERMS) {
+		memset(&xperms, 0, sizeof(struct avtab_extended_perms));
+		rc = next_entry(&xperms.specified, fp, sizeof(u8));
+		if (rc) {
+			printk(KERN_ERR "SELinux: avtab: truncated entry\n");
+			return rc;
+		}
+		rc = next_entry(&xperms.driver, fp, sizeof(u8));
+		if (rc) {
+			printk(KERN_ERR "SELinux: avtab: truncated entry\n");
+			return rc;
+		}
+		rc = next_entry(buf32, fp, sizeof(u32)*ARRAY_SIZE(xperms.perms.p));
+		if (rc) {
+			printk(KERN_ERR "SELinux: avtab: truncated entry\n");
+			return rc;
+		}
+		for (i = 0; i < ARRAY_SIZE(xperms.perms.p); i++)
+			xperms.perms.p[i] = le32_to_cpu(buf32[i]);
+		datum.u.xperms = &xperms;
+	} else {
+		rc = next_entry(buf32, fp, sizeof(u32));
+		if (rc) {
+			printk(KERN_ERR "SELinux: avtab: truncated entry\n");
+			return rc;
+		}
+		datum.u.data = le32_to_cpu(*buf32);
 	}
-	datum.data = le32_to_cpu(*buf32);
 	if ((key.specified & AVTAB_TYPE) &&
-	    !policydb_type_isvalid(pol, datum.data)) {
+	    !policydb_type_isvalid(pol, datum.u.data)) {
 		printk(KERN_ERR "SELinux: avtab: invalid type\n");
 		return -EINVAL;
 	}
@@ -543,8 +600,9 @@ int avtab_read(struct avtab *a, void *fp, struct policydb *pol)
 int avtab_write_item(struct policydb *p, struct avtab_node *cur, void *fp)
 {
 	__le16 buf16[4];
-	__le32 buf32[1];
+	__le32 buf32[ARRAY_SIZE(cur->datum.u.xperms->perms.p)];
 	int rc;
+	unsigned int i;
 
 	buf16[0] = cpu_to_le16(cur->key.source_type);
 	buf16[1] = cpu_to_le16(cur->key.target_type);
@@ -553,8 +611,22 @@ int avtab_write_item(struct policydb *p, struct avtab_node *cur, void *fp)
 	rc = put_entry(buf16, sizeof(u16), 4, fp);
 	if (rc)
 		return rc;
-	buf32[0] = cpu_to_le32(cur->datum.data);
-	rc = put_entry(buf32, sizeof(u32), 1, fp);
+
+	if (cur->key.specified & AVTAB_XPERMS) {
+		rc = put_entry(&cur->datum.u.xperms->specified, sizeof(u8), 1, fp);
+		if (rc)
+			return rc;
+		rc = put_entry(&cur->datum.u.xperms->driver, sizeof(u8), 1, fp);
+		if (rc)
+			return rc;
+		for (i = 0; i < ARRAY_SIZE(cur->datum.u.xperms->perms.p); i++)
+			buf32[i] = cpu_to_le32(cur->datum.u.xperms->perms.p[i]);
+		rc = put_entry(buf32, sizeof(u32),
+				ARRAY_SIZE(cur->datum.u.xperms->perms.p), fp);
+	} else {
+		buf32[0] = cpu_to_le32(cur->datum.u.data);
+		rc = put_entry(buf32, sizeof(u32), 1, fp);
+	}
 	if (rc)
 		return rc;
 	return 0;
@@ -588,9 +660,13 @@ void avtab_cache_init(void)
 	avtab_node_cachep = kmem_cache_create("avtab_node",
 					      sizeof(struct avtab_node),
 					      0, SLAB_PANIC, NULL);
+	avtab_xperms_cachep = kmem_cache_create("avtab_extended_perms",
+						sizeof(struct avtab_extended_perms),
+						0, SLAB_PANIC, NULL);
 }
 
 void avtab_cache_destroy(void)
 {
 	kmem_cache_destroy(avtab_node_cachep);
+	kmem_cache_destroy(avtab_xperms_cachep);
 }
diff --git a/security/selinux/ss/avtab.h b/security/selinux/ss/avtab.h
index adb451cd44f9d3..d946c9dc3c9ca6 100644
--- a/security/selinux/ss/avtab.h
+++ b/security/selinux/ss/avtab.h
@@ -23,6 +23,7 @@
 #ifndef _SS_AVTAB_H_
 #define _SS_AVTAB_H_
 
+#include "security.h"
 #include <linux/flex_array.h>
 
 struct avtab_key {
@@ -37,13 +38,43 @@ struct avtab_key {
 #define AVTAB_MEMBER		0x0020
 #define AVTAB_CHANGE		0x0040
 #define AVTAB_TYPE		(AVTAB_TRANSITION | AVTAB_MEMBER | AVTAB_CHANGE)
+/* extended permissions */
+#define AVTAB_XPERMS_ALLOWED	0x0100
+#define AVTAB_XPERMS_AUDITALLOW	0x0200
+#define AVTAB_XPERMS_DONTAUDIT	0x0400
+#define AVTAB_XPERMS		(AVTAB_XPERMS_ALLOWED | \
+				AVTAB_XPERMS_AUDITALLOW | \
+				AVTAB_XPERMS_DONTAUDIT)
 #define AVTAB_ENABLED_OLD   0x80000000 /* reserved for used in cond_avtab */
 #define AVTAB_ENABLED		0x8000 /* reserved for used in cond_avtab */
 	u16 specified;	/* what field is specified */
 };
 
+/*
+ * For operations that require more than the 32 permissions provided by the avc
+ * extended permissions may be used to provide 256 bits of permissions.
+ */
+struct avtab_extended_perms {
+/* These are not flags. All 256 values may be used */
+#define AVTAB_XPERMS_IOCTLFUNCTION	0x01
+#define AVTAB_XPERMS_IOCTLDRIVER	0x02
+	/* extension of the avtab_key specified */
+	u8 specified; /* ioctl, netfilter, ... */
+	/*
+	 * if 256 bits is not adequate as is often the case with ioctls, then
+	 * multiple extended perms may be used and the driver field
+	 * specifies which permissions are included.
+	 */
+	u8 driver;
+	/* 256 bits of permissions */
+	struct extended_perms_data perms;
+};
+
 struct avtab_datum {
-	u32 data; /* access vector or type value */
+	union {
+		u32 data; /* access vector or type value */
+		struct avtab_extended_perms *xperms;
+	} u;
 };
 
 struct avtab_node {
diff --git a/security/selinux/ss/conditional.c b/security/selinux/ss/conditional.c
index 62c6773be0b75f..18643bf9894d5e 100644
--- a/security/selinux/ss/conditional.c
+++ b/security/selinux/ss/conditional.c
@@ -15,6 +15,7 @@
 
 #include "security.h"
 #include "conditional.h"
+#include "services.h"
 
 /*
  * cond_evaluate_expr evaluates a conditional expr
@@ -612,21 +613,39 @@ int cond_write_list(struct policydb *p, struct cond_node *list, void *fp)
 
 	return 0;
 }
+
+void cond_compute_xperms(struct avtab *ctab, struct avtab_key *key,
+		struct extended_perms_decision *xpermd)
+{
+	struct avtab_node *node;
+
+	if (!ctab || !key || !xpermd)
+		return;
+
+	for (node = avtab_search_node(ctab, key); node;
+			node = avtab_search_node_next(node, key->specified)) {
+		if (node->key.specified & AVTAB_ENABLED)
+			services_compute_xperms_decision(xpermd, node);
+	}
+	return;
+
+}
 /* Determine whether additional permissions are granted by the conditional
  * av table, and if so, add them to the result
  */
-void cond_compute_av(struct avtab *ctab, struct avtab_key *key, struct av_decision *avd)
+void cond_compute_av(struct avtab *ctab, struct avtab_key *key,
+		struct av_decision *avd, struct extended_perms *xperms)
 {
 	struct avtab_node *node;
 
-	if (!ctab || !key || !avd)
+	if (!ctab || !key || !avd || !xperms)
 		return;
 
 	for (node = avtab_search_node(ctab, key); node;
 				node = avtab_search_node_next(node, key->specified)) {
 		if ((u16)(AVTAB_ALLOWED|AVTAB_ENABLED) ==
 		    (node->key.specified & (AVTAB_ALLOWED|AVTAB_ENABLED)))
-			avd->allowed |= node->datum.data;
+			avd->allowed |= node->datum.u.data;
 		if ((u16)(AVTAB_AUDITDENY|AVTAB_ENABLED) ==
 		    (node->key.specified & (AVTAB_AUDITDENY|AVTAB_ENABLED)))
 			/* Since a '0' in an auditdeny mask represents a
@@ -634,10 +653,13 @@ void cond_compute_av(struct avtab *ctab, struct avtab_key *key, struct av_decisi
 			 * the '&' operand to ensure that all '0's in the mask
 			 * are retained (much unlike the allow and auditallow cases).
 			 */
-			avd->auditdeny &= node->datum.data;
+			avd->auditdeny &= node->datum.u.data;
 		if ((u16)(AVTAB_AUDITALLOW|AVTAB_ENABLED) ==
 		    (node->key.specified & (AVTAB_AUDITALLOW|AVTAB_ENABLED)))
-			avd->auditallow |= node->datum.data;
+			avd->auditallow |= node->datum.u.data;
+		if ((node->key.specified & AVTAB_ENABLED) &&
+				(node->key.specified & AVTAB_XPERMS))
+			services_compute_xperms_drivers(xperms, node);
 	}
 	return;
 }
diff --git a/security/selinux/ss/conditional.h b/security/selinux/ss/conditional.h
index 4d1f87466508f7..ddb43e7e1c756d 100644
--- a/security/selinux/ss/conditional.h
+++ b/security/selinux/ss/conditional.h
@@ -73,8 +73,10 @@ int cond_read_list(struct policydb *p, void *fp);
 int cond_write_bool(void *key, void *datum, void *ptr);
 int cond_write_list(struct policydb *p, struct cond_node *list, void *fp);
 
-void cond_compute_av(struct avtab *ctab, struct avtab_key *key, struct av_decision *avd);
-
+void cond_compute_av(struct avtab *ctab, struct avtab_key *key,
+		struct av_decision *avd, struct extended_perms *xperms);
+void cond_compute_xperms(struct avtab *ctab, struct avtab_key *key,
+		struct extended_perms_decision *xpermd);
 int evaluate_cond_node(struct policydb *p, struct cond_node *node);
 
 #endif /* _CONDITIONAL_H_ */
diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c
index 74aa224267c11f..992a3153082587 100644
--- a/security/selinux/ss/policydb.c
+++ b/security/selinux/ss/policydb.c
@@ -148,6 +148,11 @@ static struct policydb_compat_info policydb_compat[] = {
 		.sym_num	= SYM_NUM,
 		.ocon_num	= OCON_NUM,
 	},
+	{
+		.version	= POLICYDB_VERSION_XPERMS_IOCTL,
+		.sym_num	= SYM_NUM,
+		.ocon_num	= OCON_NUM,
+	},
 };
 
 static struct policydb_compat_info *policydb_lookup_compat(int version)
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 9e2d8207091536..b7df12ba61d839 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -93,9 +93,10 @@ static int context_struct_to_string(struct context *context, char **scontext,
 				    u32 *scontext_len);
 
 static void context_struct_compute_av(struct context *scontext,
-				      struct context *tcontext,
-				      u16 tclass,
-				      struct av_decision *avd);
+					struct context *tcontext,
+					u16 tclass,
+					struct av_decision *avd,
+					struct extended_perms *xperms);
 
 struct selinux_mapping {
 	u16 value; /* policy value */
@@ -565,7 +566,8 @@ static void type_attribute_bounds_av(struct context *scontext,
 		context_struct_compute_av(&lo_scontext,
 					  tcontext,
 					  tclass,
-					  &lo_avd);
+					  &lo_avd,
+					  NULL);
 		if ((lo_avd.allowed & avd->allowed) == avd->allowed)
 			return;		/* no masked permission */
 		masked = ~lo_avd.allowed & avd->allowed;
@@ -580,7 +582,8 @@ static void type_attribute_bounds_av(struct context *scontext,
 		context_struct_compute_av(scontext,
 					  &lo_tcontext,
 					  tclass,
-					  &lo_avd);
+					  &lo_avd,
+					  NULL);
 		if ((lo_avd.allowed & avd->allowed) == avd->allowed)
 			return;		/* no masked permission */
 		masked = ~lo_avd.allowed & avd->allowed;
@@ -596,7 +599,8 @@ static void type_attribute_bounds_av(struct context *scontext,
 		context_struct_compute_av(&lo_scontext,
 					  &lo_tcontext,
 					  tclass,
-					  &lo_avd);
+					  &lo_avd,
+					  NULL);
 		if ((lo_avd.allowed & avd->allowed) == avd->allowed)
 			return;		/* no masked permission */
 		masked = ~lo_avd.allowed & avd->allowed;
@@ -613,13 +617,39 @@ static void type_attribute_bounds_av(struct context *scontext,
 }
 
 /*
- * Compute access vectors based on a context structure pair for
- * the permissions in a particular class.
+ * flag which drivers have permissions
+ * only looking for ioctl based extended permssions
+ */
+void services_compute_xperms_drivers(
+		struct extended_perms *xperms,
+		struct avtab_node *node)
+{
+	unsigned int i;
+
+	if (node->datum.u.xperms->specified == AVTAB_XPERMS_IOCTLDRIVER) {
+		/* if one or more driver has all permissions allowed */
+		for (i = 0; i < ARRAY_SIZE(xperms->drivers.p); i++)
+			xperms->drivers.p[i] |= node->datum.u.xperms->perms.p[i];
+	} else if (node->datum.u.xperms->specified == AVTAB_XPERMS_IOCTLFUNCTION) {
+		/* if allowing permissions within a driver */
+		security_xperm_set(xperms->drivers.p,
+					node->datum.u.xperms->driver);
+	}
+
+	/* If no ioctl commands are allowed, ignore auditallow and auditdeny */
+	if (node->key.specified & AVTAB_XPERMS_ALLOWED)
+		xperms->len = 1;
+}
+
+/*
+ * Compute access vectors and extended permissions based on a context
+ * structure pair for the permissions in a particular class.
  */
 static void context_struct_compute_av(struct context *scontext,
-				      struct context *tcontext,
-				      u16 tclass,
-				      struct av_decision *avd)
+					struct context *tcontext,
+					u16 tclass,
+					struct av_decision *avd,
+					struct extended_perms *xperms)
 {
 	struct constraint_node *constraint;
 	struct role_allow *ra;
@@ -633,6 +663,10 @@ static void context_struct_compute_av(struct context *scontext,
 	avd->allowed = 0;
 	avd->auditallow = 0;
 	avd->auditdeny = 0xffffffff;
+	if (xperms) {
+		memset(&xperms->drivers, 0, sizeof(xperms->drivers));
+		xperms->len = 0;
+	}
 
 	if (unlikely(!tclass || tclass > policydb.p_classes.nprim)) {
 		if (printk_ratelimit())
@@ -647,7 +681,7 @@ static void context_struct_compute_av(struct context *scontext,
 	 * this permission check, then use it.
 	 */
 	avkey.target_class = tclass;
-	avkey.specified = AVTAB_AV;
+	avkey.specified = AVTAB_AV | AVTAB_XPERMS;
 	sattr = flex_array_get(policydb.type_attr_map_array, scontext->type - 1);
 	BUG_ON(!sattr);
 	tattr = flex_array_get(policydb.type_attr_map_array, tcontext->type - 1);
@@ -660,15 +694,18 @@ static void context_struct_compute_av(struct context *scontext,
 			     node;
 			     node = avtab_search_node_next(node, avkey.specified)) {
 				if (node->key.specified == AVTAB_ALLOWED)
-					avd->allowed |= node->datum.data;
+					avd->allowed |= node->datum.u.data;
 				else if (node->key.specified == AVTAB_AUDITALLOW)
-					avd->auditallow |= node->datum.data;
+					avd->auditallow |= node->datum.u.data;
 				else if (node->key.specified == AVTAB_AUDITDENY)
-					avd->auditdeny &= node->datum.data;
+					avd->auditdeny &= node->datum.u.data;
+				else if (xperms && (node->key.specified & AVTAB_XPERMS))
+					services_compute_xperms_drivers(xperms, node);
 			}
 
 			/* Check conditional av table for additional permissions */
-			cond_compute_av(&policydb.te_cond_avtab, &avkey, avd);
+			cond_compute_av(&policydb.te_cond_avtab, &avkey,
+					avd, xperms);
 
 		}
 	}
@@ -899,6 +936,139 @@ static void avd_init(struct av_decision *avd)
 	avd->flags = 0;
 }
 
+void services_compute_xperms_decision(struct extended_perms_decision *xpermd,
+					struct avtab_node *node)
+{
+	unsigned int i;
+
+	if (node->datum.u.xperms->specified == AVTAB_XPERMS_IOCTLFUNCTION) {
+		if (xpermd->driver != node->datum.u.xperms->driver)
+			return;
+	} else if (node->datum.u.xperms->specified == AVTAB_XPERMS_IOCTLDRIVER) {
+		if (!security_xperm_test(node->datum.u.xperms->perms.p,
+					xpermd->driver))
+			return;
+	} else {
+		BUG();
+	}
+
+	if (node->key.specified == AVTAB_XPERMS_ALLOWED) {
+		xpermd->used |= XPERMS_ALLOWED;
+		if (node->datum.u.xperms->specified == AVTAB_XPERMS_IOCTLDRIVER) {
+			memset(xpermd->allowed->p, 0xff,
+					sizeof(xpermd->allowed->p));
+		}
+		if (node->datum.u.xperms->specified == AVTAB_XPERMS_IOCTLFUNCTION) {
+			for (i = 0; i < ARRAY_SIZE(xpermd->allowed->p); i++)
+				xpermd->allowed->p[i] |=
+					node->datum.u.xperms->perms.p[i];
+		}
+	} else if (node->key.specified == AVTAB_XPERMS_AUDITALLOW) {
+		xpermd->used |= XPERMS_AUDITALLOW;
+		if (node->datum.u.xperms->specified == AVTAB_XPERMS_IOCTLDRIVER) {
+			memset(xpermd->auditallow->p, 0xff,
+					sizeof(xpermd->auditallow->p));
+		}
+		if (node->datum.u.xperms->specified == AVTAB_XPERMS_IOCTLFUNCTION) {
+			for (i = 0; i < ARRAY_SIZE(xpermd->auditallow->p); i++)
+				xpermd->auditallow->p[i] |=
+					node->datum.u.xperms->perms.p[i];
+		}
+	} else if (node->key.specified == AVTAB_XPERMS_DONTAUDIT) {
+		xpermd->used |= XPERMS_DONTAUDIT;
+		if (node->datum.u.xperms->specified == AVTAB_XPERMS_IOCTLDRIVER) {
+			memset(xpermd->dontaudit->p, 0xff,
+					sizeof(xpermd->dontaudit->p));
+		}
+		if (node->datum.u.xperms->specified == AVTAB_XPERMS_IOCTLFUNCTION) {
+			for (i = 0; i < ARRAY_SIZE(xpermd->dontaudit->p); i++)
+				xpermd->dontaudit->p[i] |=
+					node->datum.u.xperms->perms.p[i];
+		}
+	} else {
+		BUG();
+	}
+}
+
+void security_compute_xperms_decision(u32 ssid,
+				u32 tsid,
+				u16 orig_tclass,
+				u8 driver,
+				struct extended_perms_decision *xpermd)
+{
+	u16 tclass;
+	struct context *scontext, *tcontext;
+	struct avtab_key avkey;
+	struct avtab_node *node;
+	struct ebitmap *sattr, *tattr;
+	struct ebitmap_node *snode, *tnode;
+	unsigned int i, j;
+
+	xpermd->driver = driver;
+	xpermd->used = 0;
+	memset(xpermd->allowed->p, 0, sizeof(xpermd->allowed->p));
+	memset(xpermd->auditallow->p, 0, sizeof(xpermd->auditallow->p));
+	memset(xpermd->dontaudit->p, 0, sizeof(xpermd->dontaudit->p));
+
+	read_lock(&policy_rwlock);
+	if (!ss_initialized)
+		goto allow;
+
+	scontext = sidtab_search(&sidtab, ssid);
+	if (!scontext) {
+		printk(KERN_ERR "SELinux: %s:  unrecognized SID %d\n",
+		       __func__, ssid);
+		goto out;
+	}
+
+	tcontext = sidtab_search(&sidtab, tsid);
+	if (!tcontext) {
+		printk(KERN_ERR "SELinux: %s:  unrecognized SID %d\n",
+		       __func__, tsid);
+		goto out;
+	}
+
+	tclass = unmap_class(orig_tclass);
+	if (unlikely(orig_tclass && !tclass)) {
+		if (policydb.allow_unknown)
+			goto allow;
+		goto out;
+	}
+
+
+	if (unlikely(!tclass || tclass > policydb.p_classes.nprim)) {
+		pr_warn_ratelimited("SELinux:  Invalid class %hu\n", tclass);
+		goto out;
+	}
+
+	avkey.target_class = tclass;
+	avkey.specified = AVTAB_XPERMS;
+	sattr = flex_array_get(policydb.type_attr_map_array,
+				scontext->type - 1);
+	BUG_ON(!sattr);
+	tattr = flex_array_get(policydb.type_attr_map_array,
+				tcontext->type - 1);
+	BUG_ON(!tattr);
+	ebitmap_for_each_positive_bit(sattr, snode, i) {
+		ebitmap_for_each_positive_bit(tattr, tnode, j) {
+			avkey.source_type = i + 1;
+			avkey.target_type = j + 1;
+			for (node = avtab_search_node(&policydb.te_avtab, &avkey);
+			     node;
+			     node = avtab_search_node_next(node, avkey.specified))
+				services_compute_xperms_decision(xpermd, node);
+
+			cond_compute_xperms(&policydb.te_cond_avtab,
+						&avkey, xpermd);
+		}
+	}
+out:
+	read_unlock(&policy_rwlock);
+	return;
+allow:
+	memset(xpermd->allowed->p, 0xff, sizeof(xpermd->allowed->p));
+	goto out;
+}
 
 /**
  * security_compute_av - Compute access vector decisions.
@@ -906,6 +1076,7 @@ static void avd_init(struct av_decision *avd)
  * @tsid: target security identifier
  * @tclass: target security class
  * @avd: access vector decisions
+ * @xperms: extended permissions
  *
  * Compute a set of access vector decisions based on the
  * SID pair (@ssid, @tsid) for the permissions in @tclass.
@@ -913,13 +1084,15 @@ static void avd_init(struct av_decision *avd)
 void security_compute_av(u32 ssid,
 			 u32 tsid,
 			 u16 orig_tclass,
-			 struct av_decision *avd)
+			 struct av_decision *avd,
+			 struct extended_perms *xperms)
 {
 	u16 tclass;
 	struct context *scontext = NULL, *tcontext = NULL;
 
 	read_lock(&policy_rwlock);
 	avd_init(avd);
+	xperms->len = 0;
 	if (!ss_initialized)
 		goto allow;
 
@@ -947,7 +1120,7 @@ void security_compute_av(u32 ssid,
 			goto allow;
 		goto out;
 	}
-	context_struct_compute_av(scontext, tcontext, tclass, avd);
+	context_struct_compute_av(scontext, tcontext, tclass, avd, xperms);
 	map_decision(orig_tclass, avd, policydb.allow_unknown);
 out:
 	read_unlock(&policy_rwlock);
@@ -993,7 +1166,7 @@ void security_compute_av_user(u32 ssid,
 		goto out;
 	}
 
-	context_struct_compute_av(scontext, tcontext, tclass, avd);
+	context_struct_compute_av(scontext, tcontext, tclass, avd, NULL);
  out:
 	read_unlock(&policy_rwlock);
 	return;
@@ -1515,7 +1688,7 @@ static int security_compute_sid(u32 ssid,
 
 	if (avdatum) {
 		/* Use the type from the type transition/member/change rule. */
-		newcontext.type = avdatum->data;
+		newcontext.type = avdatum->u.data;
 	}
 
 	/* if we have a objname this is a file trans check so check those rules */
diff --git a/security/selinux/ss/services.h b/security/selinux/ss/services.h
index e8d907e903cdb1..6abcd8729ec3a6 100644
--- a/security/selinux/ss/services.h
+++ b/security/selinux/ss/services.h
@@ -11,5 +11,11 @@
 
 extern struct policydb policydb;
 
+void services_compute_xperms_drivers(struct extended_perms *xperms,
+				struct avtab_node *node);
+
+void services_compute_xperms_decision(struct extended_perms_decision *xpermd,
+					struct avtab_node *node);
+
 #endif	/* _SS_SERVICES_H_ */
 

From 9629d04ae06812f217846b69728c969afee690b4 Mon Sep 17 00:00:00 2001
From: Waiman Long <Waiman.Long@hp.com>
Date: Fri, 10 Jul 2015 17:19:56 -0400
Subject: [PATCH 020/734] selinux: reduce locking overhead in
 inode_free_security()

The inode_free_security() function just took the superblock's isec_lock
before checking and trying to remove the inode security struct from the
linked list. In many cases, the list was empty and so the lock taking
is wasteful as no useful work is done. On multi-socket systems with
a large number of CPUs, there can also be a fair amount of spinlock
contention on the isec_lock if many tasks are exiting at the same time.

This patch changes the code to check the state of the list first before
taking the lock and attempting to dequeue it. The list_del_init()
can be called more than once on the same list with no harm as long
as they are properly serialized. It should not be possible to have
inode_free_security() called concurrently with list_add(). For better
safety, however, we use list_empty_careful() here even though it is
still not completely safe in case that happens.

Signed-off-by: Waiman Long <Waiman.Long@hp.com>
Acked-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Paul Moore <pmoore@redhat.com>
---
 security/selinux/hooks.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index a049b72162707a..4de09f0227b4e5 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -254,10 +254,21 @@ static void inode_free_security(struct inode *inode)
 	struct inode_security_struct *isec = inode->i_security;
 	struct superblock_security_struct *sbsec = inode->i_sb->s_security;
 
-	spin_lock(&sbsec->isec_lock);
-	if (!list_empty(&isec->list))
+	/*
+	 * As not all inode security structures are in a list, we check for
+	 * empty list outside of the lock to make sure that we won't waste
+	 * time taking a lock doing nothing.
+	 *
+	 * The list_del_init() function can be safely called more than once.
+	 * It should not be possible for this function to be called with
+	 * concurrent list_add(), but for better safety against future changes
+	 * in the code, we use list_empty_careful() here.
+	 */
+	if (!list_empty_careful(&isec->list)) {
+		spin_lock(&sbsec->isec_lock);
 		list_del_init(&isec->list);
-	spin_unlock(&sbsec->isec_lock);
+		spin_unlock(&sbsec->isec_lock);
+	}
 
 	/*
 	 * The inode may still be referenced in a path walk and

From 5dee25d08eac01472904b0ab32ce35edee5c0518 Mon Sep 17 00:00:00 2001
From: Stephen Smalley <sds@tycho.nsa.gov>
Date: Fri, 10 Jul 2015 17:19:57 -0400
Subject: [PATCH 021/734] selinux: initialize sock security class to default
 value

Initialize the security class of sock security structures
to the generic socket class.  This is similar to what is
already done in inode_alloc_security for files.  Generally
the sclass field will later by set by socket_post_create
or sk_clone or sock_graft, but for protocol implementations
that fail to call any of these for newly accepted sockets,
we want some sane default that will yield a legitimate
avc denied message with non-garbage values for class and
permission.

Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Paul Moore <pmoore@redhat.com>
---
 security/selinux/hooks.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 4de09f0227b4e5..ef310f82717d77 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4559,6 +4559,7 @@ static int selinux_sk_alloc_security(struct sock *sk, int family, gfp_t priority
 
 	sksec->peer_sid = SECINITSID_UNLABELED;
 	sksec->sid = SECINITSID_UNLABELED;
+	sksec->sclass = SECCLASS_SOCKET;
 	selinux_netlbl_sk_security_reset(sksec);
 	sk->sk_security = sksec;
 

From bd1741f4cf05d7709348f591d16eeb5f786de673 Mon Sep 17 00:00:00 2001
From: Stephen Smalley <sds@tycho.nsa.gov>
Date: Fri, 10 Jul 2015 17:19:57 -0400
Subject: [PATCH 022/734] selinux: Augment BUG_ON assertion for secclass_map.

Ensure that we catch any cases where tclass == 0.

Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Paul Moore <pmoore@redhat.com>
---
 security/selinux/avc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index 2d5e1b04cd5029..324acc62f7e074 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -116,6 +116,7 @@ static void avc_dump_av(struct audit_buffer *ab, u16 tclass, u32 av)
 		return;
 	}
 
+	BUG_ON(!tclass || tclass >= ARRAY_SIZE(secclass_map));
 	perms = secclass_map[tclass-1].perms;
 
 	audit_log_format(ab, " {");
@@ -164,7 +165,7 @@ static void avc_dump_query(struct audit_buffer *ab, u32 ssid, u32 tsid, u16 tcla
 		kfree(scontext);
 	}
 
-	BUG_ON(tclass >= ARRAY_SIZE(secclass_map));
+	BUG_ON(!tclass || tclass >= ARRAY_SIZE(secclass_map));
 	audit_log_format(ab, " tclass=%s", secclass_map[tclass-1].name);
 }
 

From c3c188b2c3ed29effe8693672ee1c84184103b4e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 10 Jul 2015 17:19:58 -0400
Subject: [PATCH 023/734] selinux: Create a common helper to determine an inode
 label [ver #3]

Create a common helper function to determine the label for a new inode.
This is then used by:

	- may_create()
	- selinux_dentry_init_security()
	- selinux_inode_init_security()

This will change the behaviour of the functions slightly, bringing them
all into line.

Suggested-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Paul Moore <pmoore@redhat.com>
---
 security/selinux/hooks.c | 87 +++++++++++++++++++---------------------
 1 file changed, 41 insertions(+), 46 deletions(-)

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index ef310f82717d77..f4be0a11078811 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1709,6 +1709,32 @@ static int file_has_perm(const struct cred *cred,
 	return rc;
 }
 
+/*
+ * Determine the label for an inode that might be unioned.
+ */
+static int selinux_determine_inode_label(const struct inode *dir,
+					 const struct qstr *name,
+					 u16 tclass,
+					 u32 *_new_isid)
+{
+	const struct superblock_security_struct *sbsec = dir->i_sb->s_security;
+	const struct inode_security_struct *dsec = dir->i_security;
+	const struct task_security_struct *tsec = current_security();
+
+	if ((sbsec->flags & SE_SBINITIALIZED) &&
+	    (sbsec->behavior == SECURITY_FS_USE_MNTPOINT)) {
+		*_new_isid = sbsec->mntpoint_sid;
+	} else if ((sbsec->flags & SBLABEL_MNT) &&
+		   tsec->create_sid) {
+		*_new_isid = tsec->create_sid;
+	} else {
+		return security_transition_sid(tsec->sid, dsec->sid, tclass,
+					       name, _new_isid);
+	}
+
+	return 0;
+}
+
 /* Check whether a task can create a file. */
 static int may_create(struct inode *dir,
 		      struct dentry *dentry,
@@ -1725,7 +1751,6 @@ static int may_create(struct inode *dir,
 	sbsec = dir->i_sb->s_security;
 
 	sid = tsec->sid;
-	newsid = tsec->create_sid;
 
 	ad.type = LSM_AUDIT_DATA_DENTRY;
 	ad.u.dentry = dentry;
@@ -1736,12 +1761,10 @@ static int may_create(struct inode *dir,
 	if (rc)
 		return rc;
 
-	if (!newsid || !(sbsec->flags & SBLABEL_MNT)) {
-		rc = security_transition_sid(sid, dsec->sid, tclass,
-					     &dentry->d_name, &newsid);
-		if (rc)
-			return rc;
-	}
+	rc = selinux_determine_inode_label(dir, &dentry->d_name, tclass,
+					   &newsid);
+	if (rc)
+		return rc;
 
 	rc = avc_has_perm(sid, newsid, tclass, FILE__CREATE, &ad);
 	if (rc)
@@ -2715,32 +2738,14 @@ static int selinux_dentry_init_security(struct dentry *dentry, int mode,
 					struct qstr *name, void **ctx,
 					u32 *ctxlen)
 {
-	const struct cred *cred = current_cred();
-	struct task_security_struct *tsec;
-	struct inode_security_struct *dsec;
-	struct superblock_security_struct *sbsec;
-	struct inode *dir = d_backing_inode(dentry->d_parent);
 	u32 newsid;
 	int rc;
 
-	tsec = cred->security;
-	dsec = dir->i_security;
-	sbsec = dir->i_sb->s_security;
-
-	if (tsec->create_sid && sbsec->behavior != SECURITY_FS_USE_MNTPOINT) {
-		newsid = tsec->create_sid;
-	} else {
-		rc = security_transition_sid(tsec->sid, dsec->sid,
-					     inode_mode_to_security_class(mode),
-					     name,
-					     &newsid);
-		if (rc) {
-			printk(KERN_WARNING
-				"%s: security_transition_sid failed, rc=%d\n",
-			       __func__, -rc);
-			return rc;
-		}
-	}
+	rc = selinux_determine_inode_label(d_inode(dentry->d_parent), name,
+					   inode_mode_to_security_class(mode),
+					   &newsid);
+	if (rc)
+		return rc;
 
 	return security_sid_to_context(newsid, (char **)ctx, ctxlen);
 }
@@ -2763,22 +2768,12 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir,
 	sid = tsec->sid;
 	newsid = tsec->create_sid;
 
-	if ((sbsec->flags & SE_SBINITIALIZED) &&
-	    (sbsec->behavior == SECURITY_FS_USE_MNTPOINT))
-		newsid = sbsec->mntpoint_sid;
-	else if (!newsid || !(sbsec->flags & SBLABEL_MNT)) {
-		rc = security_transition_sid(sid, dsec->sid,
-					     inode_mode_to_security_class(inode->i_mode),
-					     qstr, &newsid);
-		if (rc) {
-			printk(KERN_WARNING "%s:  "
-			       "security_transition_sid failed, rc=%d (dev=%s "
-			       "ino=%ld)\n",
-			       __func__,
-			       -rc, inode->i_sb->s_id, inode->i_ino);
-			return rc;
-		}
-	}
+	rc = selinux_determine_inode_label(
+		dir, qstr,
+		inode_mode_to_security_class(inode->i_mode),
+		&newsid);
+	if (rc)
+		return rc;
 
 	/* Possibly defer initialization to selinux_complete_init. */
 	if (sbsec->flags & SE_SBINITIALIZED) {

From fda4d578ed0a7e1d116f56a15efea0e4ba78acad Mon Sep 17 00:00:00 2001
From: Laurent Bigonville <bigon@bigon.be>
Date: Tue, 7 Jul 2015 23:10:52 +0200
Subject: [PATCH 024/734] selinux: explicitly declare the role "base_r"

This fixes the compilation of policy generated by mdp with the recent
version of checkpolicy.

Signed-off-by: Laurent Bigonville <bigon@bigon.be>
Acked-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Paul Moore <pmoore@redhat.com>
---
 scripts/selinux/mdp/mdp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/selinux/mdp/mdp.c b/scripts/selinux/mdp/mdp.c
index 62b34ce1f50dd1..e10beb11b696e4 100644
--- a/scripts/selinux/mdp/mdp.c
+++ b/scripts/selinux/mdp/mdp.c
@@ -98,6 +98,7 @@ int main(int argc, char *argv[])
 
 	/* types, roles, and allows */
 	fprintf(fout, "type base_t;\n");
+	fprintf(fout, "role base_r;\n");
 	fprintf(fout, "role base_r types { base_t };\n");
 	for (i = 0; secclass_map[i].name; i++)
 		fprintf(fout, "allow base_t base_t:%s *;\n",

From fa3eec7791b0fe27e3112804a71ba445ff336a6b Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 1 Jul 2015 23:51:43 +0100
Subject: [PATCH 025/734] regmap: Silence warning on invalid zero length read

Zero length reads make no sense in a regmap context and are likely to
trigger bugs further down the stack so insert an error check, also
silencing compiler warnings about use of ret in cases where we iterate
per register.

Reported-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 8894b992043e04..9c1f856842a3ae 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -2180,6 +2180,8 @@ int regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
 		return -EINVAL;
 	if (reg % map->reg_stride)
 		return -EINVAL;
+	if (val_count == 0)
+		return -EINVAL;
 
 	map->lock(map->lock_arg);
 

From 8225d3853f34f6cf9caff15d8c385a528e0d7cb1 Mon Sep 17 00:00:00 2001
From: Pranith Kumar <bobby.prani@gmail.com>
Date: Fri, 21 Nov 2014 10:06:01 -0500
Subject: [PATCH 026/734] seccomp: Replace smp_read_barrier_depends() with
 lockless_dereference()

Recently lockless_dereference() was added which can be used in place of
hard-coding smp_read_barrier_depends(). The following PATCH makes the change.

Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 kernel/seccomp.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 4f44028943e663..980fd26da22e05 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -175,17 +175,16 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
  */
 static u32 seccomp_run_filters(struct seccomp_data *sd)
 {
-	struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter);
 	struct seccomp_data sd_local;
 	u32 ret = SECCOMP_RET_ALLOW;
+	/* Make sure cross-thread synced filter points somewhere sane. */
+	struct seccomp_filter *f =
+			lockless_dereference(current->seccomp.filter);
 
 	/* Ensure unexpected behavior doesn't result in failing open. */
 	if (unlikely(WARN_ON(f == NULL)))
 		return SECCOMP_RET_KILL;
 
-	/* Make sure cross-thread synced filter points somewhere sane. */
-	smp_read_barrier_depends();
-
 	if (!sd) {
 		populate_seccomp_data(&sd_local);
 		sd = &sd_local;

From 13c4a90119d28cfcb6b5bdd820c233b86c2b0237 Mon Sep 17 00:00:00 2001
From: Tycho Andersen <tycho.andersen@canonical.com>
Date: Sat, 13 Jun 2015 09:02:48 -0600
Subject: [PATCH 027/734] seccomp: add ptrace options for suspend/resume

This patch is the first step in enabling checkpoint/restore of processes
with seccomp enabled.

One of the things CRIU does while dumping tasks is inject code into them
via ptrace to collect information that is only available to the process
itself. However, if we are in a seccomp mode where these processes are
prohibited from making these syscalls, then what CRIU does kills the task.

This patch adds a new ptrace option, PTRACE_O_SUSPEND_SECCOMP, that enables
a task from the init user namespace which has CAP_SYS_ADMIN and no seccomp
filters to disable (and re-enable) seccomp filters for another task so that
they can be successfully dumped (and restored). We restrict the set of
processes that can disable seccomp through ptrace because although today
ptrace can be used to bypass seccomp, there is some discussion of closing
this loophole in the future and we would like this patch to not depend on
that behavior and be future proofed for when it is removed.

Note that seccomp can be suspended before any filters are actually
installed; this behavior is useful on criu restore, so that we can suspend
seccomp, restore the filters, unmap our restore code from the restored
process' address space, and then resume the task by detaching and have the
filters resumed as well.

v2 changes:

* require that the tracer have no seccomp filters installed
* drop TIF_NOTSC manipulation from the patch
* change from ptrace command to a ptrace option and use this ptrace option
  as the flag to check. This means that as soon as the tracer
  detaches/dies, seccomp is re-enabled and as a corrollary that one can not
  disable seccomp across PTRACE_ATTACHs.

v3 changes:

* get rid of various #ifdefs everywhere
* report more sensible errors when PTRACE_O_SUSPEND_SECCOMP is incorrectly
  used

v4 changes:

* get rid of may_suspend_seccomp() in favor of a capable() check in ptrace
  directly

v5 changes:

* check that seccomp is not enabled (or suspended) on the tracer

Signed-off-by: Tycho Andersen <tycho.andersen@canonical.com>
CC: Will Drewry <wad@chromium.org>
CC: Roland McGrath <roland@hack.frob.com>
CC: Pavel Emelyanov <xemul@parallels.com>
CC: Serge E. Hallyn <serge.hallyn@ubuntu.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Andy Lutomirski <luto@amacapital.net>
[kees: access seccomp.mode through seccomp_mode() instead]
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/ptrace.h      |  1 +
 include/uapi/linux/ptrace.h |  6 ++++--
 kernel/ptrace.c             | 13 +++++++++++++
 kernel/seccomp.c            |  8 ++++++++
 4 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 987a73a40ef846..061265f9287676 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -34,6 +34,7 @@
 #define PT_TRACE_SECCOMP	PT_EVENT_FLAG(PTRACE_EVENT_SECCOMP)
 
 #define PT_EXITKILL		(PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT)
+#define PT_SUSPEND_SECCOMP	(PTRACE_O_SUSPEND_SECCOMP << PT_OPT_FLAG_SHIFT)
 
 /* single stepping state bits (used on ARM and PA-RISC) */
 #define PT_SINGLESTEP_BIT	31
diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h
index cf1019e15f5bc5..a7a69798661440 100644
--- a/include/uapi/linux/ptrace.h
+++ b/include/uapi/linux/ptrace.h
@@ -89,9 +89,11 @@ struct ptrace_peeksiginfo_args {
 #define PTRACE_O_TRACESECCOMP	(1 << PTRACE_EVENT_SECCOMP)
 
 /* eventless options */
-#define PTRACE_O_EXITKILL	(1 << 20)
+#define PTRACE_O_EXITKILL		(1 << 20)
+#define PTRACE_O_SUSPEND_SECCOMP	(1 << 21)
 
-#define PTRACE_O_MASK		(0x000000ff | PTRACE_O_EXITKILL)
+#define PTRACE_O_MASK		(\
+	0x000000ff | PTRACE_O_EXITKILL | PTRACE_O_SUSPEND_SECCOMP)
 
 #include <asm/ptrace.h>
 
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index c8e0e050a36afb..787320de68e024 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -556,6 +556,19 @@ static int ptrace_setoptions(struct task_struct *child, unsigned long data)
 	if (data & ~(unsigned long)PTRACE_O_MASK)
 		return -EINVAL;
 
+	if (unlikely(data & PTRACE_O_SUSPEND_SECCOMP)) {
+		if (!config_enabled(CONFIG_CHECKPOINT_RESTORE) ||
+		    !config_enabled(CONFIG_SECCOMP))
+			return -EINVAL;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (seccomp_mode(&current->seccomp) != SECCOMP_MODE_DISABLED ||
+		    current->ptrace & PT_SUSPEND_SECCOMP)
+			return -EPERM;
+	}
+
 	/* Avoid intermediate state when all opts are cleared */
 	flags = child->ptrace;
 	flags &= ~(PTRACE_O_MASK << PT_OPT_FLAG_SHIFT);
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 980fd26da22e05..645e42d6fa4d2e 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -590,6 +590,10 @@ void secure_computing_strict(int this_syscall)
 {
 	int mode = current->seccomp.mode;
 
+	if (config_enabled(CONFIG_CHECKPOINT_RESTORE) &&
+	    unlikely(current->ptrace & PT_SUSPEND_SECCOMP))
+		return;
+
 	if (mode == 0)
 		return;
 	else if (mode == SECCOMP_MODE_STRICT)
@@ -691,6 +695,10 @@ u32 seccomp_phase1(struct seccomp_data *sd)
 	int this_syscall = sd ? sd->nr :
 		syscall_get_nr(current, task_pt_regs(current));
 
+	if (config_enabled(CONFIG_CHECKPOINT_RESTORE) &&
+	    unlikely(current->ptrace & PT_SUSPEND_SECCOMP))
+		return SECCOMP_PHASE1_OK;
+
 	switch (mode) {
 	case SECCOMP_MODE_STRICT:
 		__secure_computing_strict(this_syscall);  /* may call do_exit */

From 221272f97ca528048a577a3ff23d7774286ca5fd Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 15 Jun 2015 15:29:16 -0700
Subject: [PATCH 028/734] seccomp: swap hard-coded zeros to defined name

For clarity, if CONFIG_SECCOMP isn't defined, seccomp_mode() is returning
"disabled". This makes that more clear, along with another 0-use, and
results in no operational change.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/seccomp.h | 2 +-
 kernel/seccomp.c        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index a19ddacdac30ae..f4265039a94c8f 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -78,7 +78,7 @@ static inline long prctl_set_seccomp(unsigned long arg2, char __user *arg3)
 
 static inline int seccomp_mode(struct seccomp *s)
 {
-	return 0;
+	return SECCOMP_MODE_DISABLED;
 }
 #endif /* CONFIG_SECCOMP */
 
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 645e42d6fa4d2e..383bd6caca815d 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -594,7 +594,7 @@ void secure_computing_strict(int this_syscall)
 	    unlikely(current->ptrace & PT_SUSPEND_SECCOMP))
 		return;
 
-	if (mode == 0)
+	if (mode == SECCOMP_MODE_DISABLED)
 		return;
 	else if (mode == SECCOMP_MODE_STRICT)
 		__secure_computing_strict(this_syscall);

From 2de9d6006c190bb0f706e8404de94cd94293801f Mon Sep 17 00:00:00 2001
From: Nariman Poushin <nariman@opensource.wolfsonmicro.com>
Date: Thu, 16 Jul 2015 16:36:22 +0100
Subject: [PATCH 029/734] regmap: Apply optional delay in
 multi_reg_write/register_patch

Add an optional delay_us field in reg_sequence to allow the client to
specify a delay (in microseconds) to be applied after any given write
in a sequence of writes.

We treat a delay in a sequence the same way we treat a page change as
they are logically similar in that you can coalesce all write before
a delay (in the same way you can coalesce all writes before a page
change is needed)

Signed-off-by: Nariman Poushin <nariman@opensource.wolfsonmicro.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c | 54 ++++++++++++++++++++++++++++++++----
 include/linux/regmap.h       |  5 +++-
 2 files changed, 52 insertions(+), 7 deletions(-)

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 2cbb4502747d83..b3a5aa5cd580c0 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -18,6 +18,7 @@
 #include <linux/of.h>
 #include <linux/rbtree.h>
 #include <linux/sched.h>
+#include <linux/delay.h>
 
 #define CREATE_TRACE_POINTS
 #include "trace.h"
@@ -1807,10 +1808,12 @@ static int _regmap_range_multi_paged_reg_write(struct regmap *map,
 	int i, n;
 	struct reg_sequence *base;
 	unsigned int this_page = 0;
+	unsigned int page_change = 0;
 	/*
 	 * the set of registers are not neccessarily in order, but
 	 * since the order of write must be preserved this algorithm
-	 * chops the set each time the page changes
+	 * chops the set each time the page changes. This also applies
+	 * if there is a delay required at any point in the sequence.
 	 */
 	base = regs;
 	for (i = 0, n = 0; i < num_regs; i++, n++) {
@@ -1826,16 +1829,48 @@ static int _regmap_range_multi_paged_reg_write(struct regmap *map,
 				this_page = win_page;
 			if (win_page != this_page) {
 				this_page = win_page;
+				page_change = 1;
+			}
+		}
+
+		/* If we have both a page change and a delay make sure to
+		 * write the regs and apply the delay before we change the
+		 * page.
+		 */
+
+		if (page_change || regs[i].delay_us) {
+
+				/* For situations where the first write requires
+				 * a delay we need to make sure we don't call
+				 * raw_multi_reg_write with n=0
+				 * This can't occur with page breaks as we
+				 * never write on the first iteration
+				 */
+				if (regs[i].delay_us && i == 0)
+					n = 1;
+
 				ret = _regmap_raw_multi_reg_write(map, base, n);
 				if (ret != 0)
 					return ret;
+
+				if (regs[i].delay_us)
+					udelay(regs[i].delay_us);
+
 				base += n;
 				n = 0;
-			}
-			ret = _regmap_select_page(map, &base[n].reg, range, 1);
-			if (ret != 0)
-				return ret;
+
+				if (page_change) {
+					ret = _regmap_select_page(map,
+								  &base[n].reg,
+								  range, 1);
+					if (ret != 0)
+						return ret;
+
+					page_change = 0;
+				}
+
 		}
+
 	}
 	if (n > 0)
 		return _regmap_raw_multi_reg_write(map, base, n);
@@ -1854,6 +1889,9 @@ static int _regmap_multi_reg_write(struct regmap *map,
 			ret = _regmap_write(map, regs[i].reg, regs[i].def);
 			if (ret != 0)
 				return ret;
+
+			if (regs[i].delay_us)
+				udelay(regs[i].delay_us);
 		}
 		return 0;
 	}
@@ -1893,8 +1931,12 @@ static int _regmap_multi_reg_write(struct regmap *map,
 	for (i = 0; i < num_regs; i++) {
 		unsigned int reg = regs[i].reg;
 		struct regmap_range_node *range;
+
+		/* Coalesce all the writes between a page break or a delay
+		 * in a sequence
+		 */
 		range = _regmap_range_lookup(map, reg);
-		if (range) {
+		if (range || regs[i].delay_us) {
 			size_t len = sizeof(struct reg_sequence)*num_regs;
 			struct reg_sequence *base = kmemdup(regs, len,
 							   GFP_KERNEL);
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index c9ef2ec69142e2..5a7cf2136c81fd 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -51,14 +51,17 @@ struct reg_default {
 };
 
 /**
- * Register/value pairs for sequences of writes
+ * Register/value pairs for sequences of writes with an optional delay in
+ * microseconds to be applied after each write.
  *
  * @reg: Register address.
  * @def: Register value.
+ * @delay_us: Delay to be applied after the register write in microseconds
  */
 struct reg_sequence {
 	unsigned int reg;
 	unsigned int def;
+	unsigned int delay_us;
 };
 
 #ifdef CONFIG_REGMAP

From b2c0b2cbb282f0cf42518ffacbe197e6f2884168 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Wed, 3 Sep 2014 23:57:13 +0100
Subject: [PATCH 030/734] nmi: create generic NMI backtrace implementation

x86s NMI backtrace implementation (for arch_trigger_all_cpu_backtrace())
is fairly generic in nature - the only architecture specific bits are
the act of raising the NMI to other CPUs, and reporting the status of
the NMI handler.

These are fairly simple to factor out, and produce a generic
implementation which can be shared between ARM and x86.

Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/nmi.h |   6 ++
 lib/Makefile        |   2 +-
 lib/nmi_backtrace.c | 162 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 169 insertions(+), 1 deletion(-)
 create mode 100644 lib/nmi_backtrace.c

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index f94da0e65dea90..5791e3229068f7 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -49,6 +49,12 @@ static inline bool trigger_allbutself_cpu_backtrace(void)
 	arch_trigger_all_cpu_backtrace(false);
 	return true;
 }
+
+/* generic implementation */
+void nmi_trigger_all_cpu_backtrace(bool include_self,
+				   void (*raise)(cpumask_t *mask));
+bool nmi_cpu_backtrace(struct pt_regs *regs);
+
 #else
 static inline bool trigger_all_cpu_backtrace(void)
 {
diff --git a/lib/Makefile b/lib/Makefile
index 6897b527581a8d..392169c5bc4eda 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -13,7 +13,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 sha1.o md5.o irq_regs.o argv_split.o \
 	 proportions.o flex_proportions.o ratelimit.o show_mem.o \
 	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
-	 earlycpio.o seq_buf.o
+	 earlycpio.o seq_buf.o nmi_backtrace.o
 
 obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o
 lib-$(CONFIG_MMU) += ioremap.o
diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c
new file mode 100644
index 00000000000000..88d3d32e59236b
--- /dev/null
+++ b/lib/nmi_backtrace.c
@@ -0,0 +1,162 @@
+/*
+ *  NMI backtrace support
+ *
+ * Gratuitously copied from arch/x86/kernel/apic/hw_nmi.c by Russell King,
+ * with the following header:
+ *
+ *  HW NMI watchdog support
+ *
+ *  started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
+ *
+ *  Arch specific calls to support NMI watchdog
+ *
+ *  Bits copied from original nmi.c file
+ */
+#include <linux/cpumask.h>
+#include <linux/delay.h>
+#include <linux/kprobes.h>
+#include <linux/nmi.h>
+#include <linux/seq_buf.h>
+
+#ifdef arch_trigger_all_cpu_backtrace
+/* For reliability, we're prepared to waste bits here. */
+static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
+static cpumask_t printtrace_mask;
+
+#define NMI_BUF_SIZE		4096
+
+struct nmi_seq_buf {
+	unsigned char		buffer[NMI_BUF_SIZE];
+	struct seq_buf		seq;
+};
+
+/* Safe printing in NMI context */
+static DEFINE_PER_CPU(struct nmi_seq_buf, nmi_print_seq);
+
+/* "in progress" flag of arch_trigger_all_cpu_backtrace */
+static unsigned long backtrace_flag;
+
+static void print_seq_line(struct nmi_seq_buf *s, int start, int end)
+{
+	const char *buf = s->buffer + start;
+
+	printk("%.*s", (end - start) + 1, buf);
+}
+
+void nmi_trigger_all_cpu_backtrace(bool include_self,
+				   void (*raise)(cpumask_t *mask))
+{
+	struct nmi_seq_buf *s;
+	int i, cpu, this_cpu = get_cpu();
+
+	if (test_and_set_bit(0, &backtrace_flag)) {
+		/*
+		 * If there is already a trigger_all_cpu_backtrace() in progress
+		 * (backtrace_flag == 1), don't output double cpu dump infos.
+		 */
+		put_cpu();
+		return;
+	}
+
+	cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
+	if (!include_self)
+		cpumask_clear_cpu(this_cpu, to_cpumask(backtrace_mask));
+
+	cpumask_copy(&printtrace_mask, to_cpumask(backtrace_mask));
+
+	/*
+	 * Set up per_cpu seq_buf buffers that the NMIs running on the other
+	 * CPUs will write to.
+	 */
+	for_each_cpu(cpu, to_cpumask(backtrace_mask)) {
+		s = &per_cpu(nmi_print_seq, cpu);
+		seq_buf_init(&s->seq, s->buffer, NMI_BUF_SIZE);
+	}
+
+	if (!cpumask_empty(to_cpumask(backtrace_mask))) {
+		pr_info("Sending NMI to %s CPUs:\n",
+			(include_self ? "all" : "other"));
+		raise(to_cpumask(backtrace_mask));
+	}
+
+	/* Wait for up to 10 seconds for all CPUs to do the backtrace */
+	for (i = 0; i < 10 * 1000; i++) {
+		if (cpumask_empty(to_cpumask(backtrace_mask)))
+			break;
+		mdelay(1);
+		touch_softlockup_watchdog();
+	}
+
+	/*
+	 * Now that all the NMIs have triggered, we can dump out their
+	 * back traces safely to the console.
+	 */
+	for_each_cpu(cpu, &printtrace_mask) {
+		int len, last_i = 0;
+
+		s = &per_cpu(nmi_print_seq, cpu);
+		len = seq_buf_used(&s->seq);
+		if (!len)
+			continue;
+
+		/* Print line by line. */
+		for (i = 0; i < len; i++) {
+			if (s->buffer[i] == '\n') {
+				print_seq_line(s, last_i, i);
+				last_i = i + 1;
+			}
+		}
+		/* Check if there was a partial line. */
+		if (last_i < len) {
+			print_seq_line(s, last_i, len - 1);
+			pr_cont("\n");
+		}
+	}
+
+	clear_bit(0, &backtrace_flag);
+	smp_mb__after_atomic();
+	put_cpu();
+}
+
+/*
+ * It is not safe to call printk() directly from NMI handlers.
+ * It may be fine if the NMI detected a lock up and we have no choice
+ * but to do so, but doing a NMI on all other CPUs to get a back trace
+ * can be done with a sysrq-l. We don't want that to lock up, which
+ * can happen if the NMI interrupts a printk in progress.
+ *
+ * Instead, we redirect the vprintk() to this nmi_vprintk() that writes
+ * the content into a per cpu seq_buf buffer. Then when the NMIs are
+ * all done, we can safely dump the contents of the seq_buf to a printk()
+ * from a non NMI context.
+ */
+static int nmi_vprintk(const char *fmt, va_list args)
+{
+	struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq);
+	unsigned int len = seq_buf_used(&s->seq);
+
+	seq_buf_vprintf(&s->seq, fmt, args);
+	return seq_buf_used(&s->seq) - len;
+}
+
+bool nmi_cpu_backtrace(struct pt_regs *regs)
+{
+	int cpu = smp_processor_id();
+
+	if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
+		printk_func_t printk_func_save = this_cpu_read(printk_func);
+
+		/* Replace printk to write into the NMI seq */
+		this_cpu_write(printk_func, nmi_vprintk);
+		pr_warn("NMI backtrace for cpu %d\n", cpu);
+		show_regs(regs);
+		this_cpu_write(printk_func, printk_func_save);
+
+		cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
+		return true;
+	}
+
+	return false;
+}
+NOKPROBE_SYMBOL(nmi_cpu_backtrace);
+#endif

From 4d7489ffba0aef4d2c708b6ff1428efd6ccf41df Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 10 Jul 2015 21:47:36 +0100
Subject: [PATCH 031/734] nmi: x86: convert to generic nmi handler

Convert x86 to use the generic nmi handler code which can be shared
between architectures.

Reviewed-and-tested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/x86/kernel/apic/hw_nmi.c | 133 +---------------------------------
 1 file changed, 4 insertions(+), 129 deletions(-)

diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 6873ab925d00ab..045e424fb3680f 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -28,146 +28,21 @@ u64 hw_nmi_get_sample_period(int watchdog_thresh)
 #endif
 
 #ifdef arch_trigger_all_cpu_backtrace
-/* For reliability, we're prepared to waste bits here. */
-static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
-static cpumask_t printtrace_mask;
-
-#define NMI_BUF_SIZE		4096
-
-struct nmi_seq_buf {
-	unsigned char		buffer[NMI_BUF_SIZE];
-	struct seq_buf		seq;
-};
-
-/* Safe printing in NMI context */
-static DEFINE_PER_CPU(struct nmi_seq_buf, nmi_print_seq);
-
-/* "in progress" flag of arch_trigger_all_cpu_backtrace */
-static unsigned long backtrace_flag;
-
-static void print_seq_line(struct nmi_seq_buf *s, int start, int end)
+static void nmi_raise_cpu_backtrace(cpumask_t *mask)
 {
-	const char *buf = s->buffer + start;
-
-	printk("%.*s", (end - start) + 1, buf);
+	apic->send_IPI_mask(mask, NMI_VECTOR);
 }
 
 void arch_trigger_all_cpu_backtrace(bool include_self)
 {
-	struct nmi_seq_buf *s;
-	int len;
-	int cpu;
-	int i;
-	int this_cpu = get_cpu();
-
-	if (test_and_set_bit(0, &backtrace_flag)) {
-		/*
-		 * If there is already a trigger_all_cpu_backtrace() in progress
-		 * (backtrace_flag == 1), don't output double cpu dump infos.
-		 */
-		put_cpu();
-		return;
-	}
-
-	cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
-	if (!include_self)
-		cpumask_clear_cpu(this_cpu, to_cpumask(backtrace_mask));
-
-	cpumask_copy(&printtrace_mask, to_cpumask(backtrace_mask));
-	/*
-	 * Set up per_cpu seq_buf buffers that the NMIs running on the other
-	 * CPUs will write to.
-	 */
-	for_each_cpu(cpu, to_cpumask(backtrace_mask)) {
-		s = &per_cpu(nmi_print_seq, cpu);
-		seq_buf_init(&s->seq, s->buffer, NMI_BUF_SIZE);
-	}
-
-	if (!cpumask_empty(to_cpumask(backtrace_mask))) {
-		pr_info("sending NMI to %s CPUs:\n",
-			(include_self ? "all" : "other"));
-		apic->send_IPI_mask(to_cpumask(backtrace_mask), NMI_VECTOR);
-	}
-
-	/* Wait for up to 10 seconds for all CPUs to do the backtrace */
-	for (i = 0; i < 10 * 1000; i++) {
-		if (cpumask_empty(to_cpumask(backtrace_mask)))
-			break;
-		mdelay(1);
-		touch_softlockup_watchdog();
-	}
-
-	/*
-	 * Now that all the NMIs have triggered, we can dump out their
-	 * back traces safely to the console.
-	 */
-	for_each_cpu(cpu, &printtrace_mask) {
-		int last_i = 0;
-
-		s = &per_cpu(nmi_print_seq, cpu);
-		len = seq_buf_used(&s->seq);
-		if (!len)
-			continue;
-
-		/* Print line by line. */
-		for (i = 0; i < len; i++) {
-			if (s->buffer[i] == '\n') {
-				print_seq_line(s, last_i, i);
-				last_i = i + 1;
-			}
-		}
-		/* Check if there was a partial line. */
-		if (last_i < len) {
-			print_seq_line(s, last_i, len - 1);
-			pr_cont("\n");
-		}
-	}
-
-	clear_bit(0, &backtrace_flag);
-	smp_mb__after_atomic();
-	put_cpu();
-}
-
-/*
- * It is not safe to call printk() directly from NMI handlers.
- * It may be fine if the NMI detected a lock up and we have no choice
- * but to do so, but doing a NMI on all other CPUs to get a back trace
- * can be done with a sysrq-l. We don't want that to lock up, which
- * can happen if the NMI interrupts a printk in progress.
- *
- * Instead, we redirect the vprintk() to this nmi_vprintk() that writes
- * the content into a per cpu seq_buf buffer. Then when the NMIs are
- * all done, we can safely dump the contents of the seq_buf to a printk()
- * from a non NMI context.
- */
-static int nmi_vprintk(const char *fmt, va_list args)
-{
-	struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq);
-	unsigned int len = seq_buf_used(&s->seq);
-
-	seq_buf_vprintf(&s->seq, fmt, args);
-	return seq_buf_used(&s->seq) - len;
+	nmi_trigger_all_cpu_backtrace(include_self, nmi_raise_cpu_backtrace);
 }
 
 static int
 arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs)
 {
-	int cpu;
-
-	cpu = smp_processor_id();
-
-	if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
-		printk_func_t printk_func_save = this_cpu_read(printk_func);
-
-		/* Replace printk to write into the NMI seq */
-		this_cpu_write(printk_func, nmi_vprintk);
-		printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
-		show_regs(regs);
-		this_cpu_write(printk_func, printk_func_save);
-
-		cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
+	if (nmi_cpu_backtrace(regs))
 		return NMI_HANDLED;
-	}
 
 	return NMI_DONE;
 }

From 96f0e00378d4a1fc1b79933ef84e1595015de808 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Wed, 3 Sep 2014 23:57:13 +0100
Subject: [PATCH 032/734] ARM: add basic support for on-demand backtrace of
 other CPUs

As we now have generic infrastructure to support backtracing of other
CPUs in the system on lockups, we can start to implement this for ARM.
Initially, we add an IPI based implementation, as the GIC code needs
modification to support the generation of FIQ IPIs, and not all ARM
platforms have the ability to raise a FIQ in the non-secure world.

This provides us with a "best efforts" implementation in the absence
of FIQs.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/irq.h |  5 +++++
 arch/arm/kernel/smp.c      | 18 ++++++++++++++++++
 2 files changed, 23 insertions(+)

diff --git a/arch/arm/include/asm/irq.h b/arch/arm/include/asm/irq.h
index 53c15dec7af6aa..be1d07d59ee978 100644
--- a/arch/arm/include/asm/irq.h
+++ b/arch/arm/include/asm/irq.h
@@ -35,6 +35,11 @@ extern void (*handle_arch_irq)(struct pt_regs *);
 extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
 #endif
 
+#ifdef CONFIG_SMP
+extern void arch_trigger_all_cpu_backtrace(bool);
+#define arch_trigger_all_cpu_backtrace(x) arch_trigger_all_cpu_backtrace(x)
+#endif
+
 #endif
 
 #endif
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 90dfbedfbfb852..3a20c386fd33f7 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -21,6 +21,7 @@
 #include <linux/cpu.h>
 #include <linux/seq_file.h>
 #include <linux/irq.h>
+#include <linux/nmi.h>
 #include <linux/percpu.h>
 #include <linux/clockchips.h>
 #include <linux/completion.h>
@@ -72,6 +73,7 @@ enum ipi_msg_type {
 	IPI_CPU_STOP,
 	IPI_IRQ_WORK,
 	IPI_COMPLETION,
+	IPI_CPU_BACKTRACE = 15,
 };
 
 static DECLARE_COMPLETION(cpu_running);
@@ -630,6 +632,12 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
 		irq_exit();
 		break;
 
+	case IPI_CPU_BACKTRACE:
+		irq_enter();
+		nmi_cpu_backtrace(regs);
+		irq_exit();
+		break;
+
 	default:
 		pr_crit("CPU%u: Unknown IPI message 0x%x\n",
 		        cpu, ipinr);
@@ -724,3 +732,13 @@ static int __init register_cpufreq_notifier(void)
 core_initcall(register_cpufreq_notifier);
 
 #endif
+
+static void raise_nmi(cpumask_t *mask)
+{
+	smp_cross_call(mask, IPI_CPU_BACKTRACE);
+}
+
+void arch_trigger_all_cpu_backtrace(bool include_self)
+{
+	nmi_trigger_all_cpu_backtrace(include_self, raise_nmi);
+}

From 0642ef6f2992eba46c41abb5ceb7d4fa14ba888e Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.wolfsonmicro.com>
Date: Tue, 23 Jun 2015 14:32:54 +0100
Subject: [PATCH 033/734] debugfs: Export bool read/write functions

The file read/write functions for bools have no special dependencies
on debugfs internals and are sufficiently non-trivial to be worth
exporting so clients can re-use the implementation.

Signed-off-by: Richard Fitzgerald <rf@opensource.wolfsonmicro.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 fs/debugfs/file.c       | 14 ++++++++------
 include/linux/debugfs.h | 20 ++++++++++++++++++++
 2 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 284f9aa0028b8d..6c55ade071c39d 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -435,8 +435,8 @@ struct dentry *debugfs_create_atomic_t(const char *name, umode_t mode,
 }
 EXPORT_SYMBOL_GPL(debugfs_create_atomic_t);
 
-static ssize_t read_file_bool(struct file *file, char __user *user_buf,
-			      size_t count, loff_t *ppos)
+ssize_t debugfs_read_file_bool(struct file *file, char __user *user_buf,
+			       size_t count, loff_t *ppos)
 {
 	char buf[3];
 	u32 *val = file->private_data;
@@ -449,9 +449,10 @@ static ssize_t read_file_bool(struct file *file, char __user *user_buf,
 	buf[2] = 0x00;
 	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
 }
+EXPORT_SYMBOL_GPL(debugfs_read_file_bool);
 
-static ssize_t write_file_bool(struct file *file, const char __user *user_buf,
-			       size_t count, loff_t *ppos)
+ssize_t debugfs_write_file_bool(struct file *file, const char __user *user_buf,
+				size_t count, loff_t *ppos)
 {
 	char buf[32];
 	size_t buf_size;
@@ -468,10 +469,11 @@ static ssize_t write_file_bool(struct file *file, const char __user *user_buf,
 
 	return count;
 }
+EXPORT_SYMBOL_GPL(debugfs_write_file_bool);
 
 static const struct file_operations fops_bool = {
-	.read =		read_file_bool,
-	.write =	write_file_bool,
+	.read =		debugfs_read_file_bool,
+	.write =	debugfs_write_file_bool,
 	.open =		simple_open,
 	.llseek =	default_llseek,
 };
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index 420311bcee38c2..9beb636b97ebcd 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -116,6 +116,12 @@ struct dentry *debugfs_create_devm_seqfile(struct device *dev, const char *name,
 
 bool debugfs_initialized(void);
 
+ssize_t debugfs_read_file_bool(struct file *file, char __user *user_buf,
+			       size_t count, loff_t *ppos);
+
+ssize_t debugfs_write_file_bool(struct file *file, const char __user *user_buf,
+				size_t count, loff_t *ppos);
+
 #else
 
 #include <linux/err.h>
@@ -282,6 +288,20 @@ static inline struct dentry *debugfs_create_devm_seqfile(struct device *dev,
 	return ERR_PTR(-ENODEV);
 }
 
+static inline ssize_t debugfs_read_file_bool(struct file *file,
+					     char __user *user_buf,
+					     size_t count, loff_t *ppos)
+{
+	return -ENODEV;
+}
+
+static inline ssize_t debugfs_write_file_bool(struct file *file,
+					      const char __user *user_buf,
+					      size_t count, loff_t *ppos)
+{
+	return -ENODEV;
+}
+
 #endif
 
 #endif

From d3dc5430d68fb91a62d971648170b34d46ab85bc Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.wolfsonmicro.com>
Date: Tue, 23 Jun 2015 14:32:55 +0100
Subject: [PATCH 034/734] regmap: debugfs: Allow writes to cache state settings

Allow the user to write the cache_only and cache_bypass settings.
This can be useful for debugging.

Since this can lead to the hardware getting out-of-sync with the
cache, at least for the period that the cache state is forced, the
kernel is tainted and the action is recorded in the kernel log.

When disabling cache_only through debugfs a cache sync will be performed.

Signed-off-by: Richard Fitzgerald <rf@opensource.wolfsonmicro.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap-debugfs.c | 90 ++++++++++++++++++++++++++--
 1 file changed, 86 insertions(+), 4 deletions(-)

diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c
index 5799a0b9e6cc41..6a61e4fa73a28a 100644
--- a/drivers/base/regmap/regmap-debugfs.c
+++ b/drivers/base/regmap/regmap-debugfs.c
@@ -469,6 +469,87 @@ static const struct file_operations regmap_access_fops = {
 	.llseek = default_llseek,
 };
 
+static ssize_t regmap_cache_only_write_file(struct file *file,
+					    const char __user *user_buf,
+					    size_t count, loff_t *ppos)
+{
+	struct regmap *map = container_of(file->private_data,
+					  struct regmap, cache_only);
+	ssize_t result;
+	bool was_enabled, require_sync = false;
+	int err;
+
+	map->lock(map->lock_arg);
+
+	was_enabled = map->cache_only;
+
+	result = debugfs_write_file_bool(file, user_buf, count, ppos);
+	if (result < 0) {
+		map->unlock(map->lock_arg);
+		return result;
+	}
+
+	if (map->cache_only && !was_enabled) {
+		dev_warn(map->dev, "debugfs cache_only=Y forced\n");
+		add_taint(TAINT_USER, LOCKDEP_STILL_OK);
+	} else if (!map->cache_only && was_enabled) {
+		dev_warn(map->dev, "debugfs cache_only=N forced: syncing cache\n");
+		require_sync = true;
+	}
+
+	map->unlock(map->lock_arg);
+
+	if (require_sync) {
+		err = regcache_sync(map);
+		if (err)
+			dev_err(map->dev, "Failed to sync cache %d\n", err);
+	}
+
+	return result;
+}
+
+static const struct file_operations regmap_cache_only_fops = {
+	.open = simple_open,
+	.read = debugfs_read_file_bool,
+	.write = regmap_cache_only_write_file,
+};
+
+static ssize_t regmap_cache_bypass_write_file(struct file *file,
+					      const char __user *user_buf,
+					      size_t count, loff_t *ppos)
+{
+	struct regmap *map = container_of(file->private_data,
+					  struct regmap, cache_bypass);
+	ssize_t result;
+	bool was_enabled;
+
+	map->lock(map->lock_arg);
+
+	was_enabled = map->cache_bypass;
+
+	result = debugfs_write_file_bool(file, user_buf, count, ppos);
+	if (result < 0)
+		goto out;
+
+	if (map->cache_bypass && !was_enabled) {
+		dev_warn(map->dev, "debugfs cache_bypass=Y forced\n");
+		add_taint(TAINT_USER, LOCKDEP_STILL_OK);
+	} else if (!map->cache_bypass && was_enabled) {
+		dev_warn(map->dev, "debugfs cache_bypass=N forced\n");
+	}
+
+out:
+	map->unlock(map->lock_arg);
+
+	return result;
+}
+
+static const struct file_operations regmap_cache_bypass_fops = {
+	.open = simple_open,
+	.read = debugfs_read_file_bool,
+	.write = regmap_cache_bypass_write_file,
+};
+
 void regmap_debugfs_init(struct regmap *map, const char *name)
 {
 	struct rb_node *next;
@@ -530,12 +611,13 @@ void regmap_debugfs_init(struct regmap *map, const char *name)
 	}
 
 	if (map->cache_type) {
-		debugfs_create_bool("cache_only", 0400, map->debugfs,
-				    &map->cache_only);
+		debugfs_create_file("cache_only", 0600, map->debugfs,
+				    &map->cache_only, &regmap_cache_only_fops);
 		debugfs_create_bool("cache_dirty", 0400, map->debugfs,
 				    &map->cache_dirty);
-		debugfs_create_bool("cache_bypass", 0400, map->debugfs,
-				    &map->cache_bypass);
+		debugfs_create_file("cache_bypass", 0600, map->debugfs,
+				    &map->cache_bypass,
+				    &regmap_cache_bypass_fops);
 	}
 
 	next = rb_first(&map->range_tree);

From 9fe6b778ca93e6171dbb8e54df557a278a91abea Mon Sep 17 00:00:00 2001
From: Gil Fruchter <gilf@ezchip.com>
Date: Tue, 9 Jun 2015 10:32:34 +0300
Subject: [PATCH 035/734] tracing: Prefer kcalloc over kzalloc with multiply

Use kcalloc for allocating an array instead of kzalloc with multiply,
as that is what kcalloc is used for.
Found with checkpatch.

Link: http://lkml.kernel.org/r/1433835155-6894-2-git-send-email-gilf@ezchip.com

Signed-off-by: Gil Fruchter <gilf@ezchip.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index abcbf7ff874364..5d219384b4d187 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3035,7 +3035,7 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot)
 	if (!iter)
 		return ERR_PTR(-ENOMEM);
 
-	iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
+	iter->buffer_iter = kcalloc(num_possible_cpus(), sizeof(*iter->buffer_iter),
 				    GFP_KERNEL);
 	if (!iter->buffer_iter)
 		goto release;

From 72917235fd5f08638be1d52dcdb0fee3ce2cc95f Mon Sep 17 00:00:00 2001
From: Gil Fruchter <gilf@ezchip.com>
Date: Tue, 9 Jun 2015 10:32:35 +0300
Subject: [PATCH 036/734] tracing: Fix for non-continuous cpu ids

Currently exception occures due to access beyond buffer_iter
range while using index of cpu bigger than num_possible_cpus().
Below there is an example for such exception when we use
cpus 0,1,16,17.

In order to fix buffer allocation size for non-continuous cpu ids
we allocate according to the max cpu id and not according to the
amount of possible cpus.

Example:
  $ cat /sys/kernel/debug/tracing/per_cpu/cpu1/trace
  Path: /bin/busybox
  CPU: 0 PID: 82 Comm: cat Not tainted 4.0.0 #29
  task: 80734c80 ti: 80012000 task.ti: 80012000

  [ECR   ]: 0x00220100 => Invalid Read @ 0x00000000 by insn @ 0x800abafc
  [EFA   ]: 0x00000000
  [BLINK ]: ring_buffer_read_finish+0x24/0x64
  [ERET  ]: rb_check_pages+0x20/0x188
  [STAT32]: 0x00001a00 :
  BTA: 0x800abafc  SP: 0x80013f0c  FP: 0x57719cf8
  LPS: 0x200036b4 LPE: 0x200036b8 LPC: 0x00000000
  r00: 0x8002aca0 r01: 0x00001606 r02: 0x00000000
  r03: 0x00000001 r04: 0x00000000 r05: 0x804b4954
  r06: 0x00030003 r07: 0x8002a260 r08: 0x00000286
  r09: 0x00080002 r10: 0x00001006 r11: 0x807351a4
  r12: 0x00000001

  Stack Trace:
    rb_check_pages+0x20/0x188
    ring_buffer_read_finish+0x24/0x64
    tracing_release+0x4e/0x170
    __fput+0x62/0x158
    task_work_run+0xa2/0xd4
    do_notify_resume+0x52/0x7c
    resume_user_mode_begin+0xdc/0xe0

Link: http://lkml.kernel.org/r/1433835155-6894-3-git-send-email-gilf@ezchip.com

Signed-off-by: Noam Camus <noamc@ezchip.com>
Signed-off-by: Gil Fruchter <gilf@ezchip.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 5d219384b4d187..59814adc39d6b4 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3035,7 +3035,7 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot)
 	if (!iter)
 		return ERR_PTR(-ENOMEM);
 
-	iter->buffer_iter = kcalloc(num_possible_cpus(), sizeof(*iter->buffer_iter),
+	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
 				    GFP_KERNEL);
 	if (!iter->buffer_iter)
 		goto release;

From 5e2d5ef8ec1e3854daec41a3697a8d2ce05ff2ef Mon Sep 17 00:00:00 2001
From: Umesh Tiwari <umesh.t@samsung.com>
Date: Mon, 22 Jun 2015 16:55:06 +0530
Subject: [PATCH 037/734] ftrace: correct the counter increment for
 trace_buffer data

In ftrace_dump, for disabling buffer, iter.tr->trace_buffer.data is used.
But for enabling, iter.trace_buffer->data is used.
Even though, both point to same buffer, for readability, same convention
should be used.

Link: http://lkml.kernel.org/r/1434972306-20043-1-git-send-email-umesh.t@samsung.com

Signed-off-by: Umesh Tiwari <umesh.t@samsung.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 59814adc39d6b4..6e79408674aaa1 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6990,7 +6990,7 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
 	trace_init_global_iter(&iter);
 
 	for_each_tracing_cpu(cpu) {
-		atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
+		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
 	}
 
 	old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;

From fcc742eaad7cbcbbb2a96edc8f1d22adbaa804cb Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Thu, 28 May 2015 17:13:14 -0400
Subject: [PATCH 038/734] ring-buffer: Add event descriptor to simplify passing
 data

Add rb_event_info descriptor to pass event info to functions a bit
easier than using a bunch of parameters. This will also allow for
changing the code around a bit to find better fast paths.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/ring_buffer.c | 91 ++++++++++++++++++++++----------------
 1 file changed, 52 insertions(+), 39 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 6260717c18e3c6..ba8f25ffcf6f12 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -399,6 +399,17 @@ struct rb_irq_work {
 	bool				wakeup_full;
 };
 
+/*
+ * Structure to hold event state and handle nested events.
+ */
+struct rb_event_info {
+	u64			ts;
+	u64			delta;
+	unsigned long		length;
+	struct buffer_page	*tail_page;
+	int			add_timestamp;
+};
+
 /*
  * Used for which event context the event is in.
  *  NMI     = 0
@@ -2000,9 +2011,12 @@ rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
  */
 static void
 rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
-		struct ring_buffer_event *event, unsigned length,
-		int add_timestamp, u64 delta)
+		struct ring_buffer_event *event,
+		struct rb_event_info *info)
 {
+	unsigned length = info->length;
+	u64 delta = info->delta;
+
 	/* Only a commit updates the timestamp */
 	if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
 		delta = 0;
@@ -2011,7 +2025,7 @@ rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
 	 * If we need to add a timestamp, then we
 	 * add it to the start of the resevered space.
 	 */
-	if (unlikely(add_timestamp)) {
+	if (unlikely(info->add_timestamp)) {
 		event = rb_add_time_stamp(event, delta);
 		length -= RB_LEN_TIME_EXTEND;
 		delta = 0;
@@ -2203,10 +2217,11 @@ static unsigned rb_calculate_event_length(unsigned length)
 
 static inline void
 rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
-	      struct buffer_page *tail_page,
-	      unsigned long tail, unsigned long length)
+	      unsigned long tail, struct rb_event_info *info)
 {
+	struct buffer_page *tail_page = info->tail_page;
 	struct ring_buffer_event *event;
+	unsigned long length = info->length;
 
 	/*
 	 * Only the event that crossed the page boundary
@@ -2276,13 +2291,14 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
  */
 static noinline struct ring_buffer_event *
 rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
-	     unsigned long length, unsigned long tail,
-	     struct buffer_page *tail_page, u64 ts)
+	     unsigned long tail, struct rb_event_info *info)
 {
+	struct buffer_page *tail_page = info->tail_page;
 	struct buffer_page *commit_page = cpu_buffer->commit_page;
 	struct ring_buffer *buffer = cpu_buffer->buffer;
 	struct buffer_page *next_page;
 	int ret;
+	u64 ts;
 
 	next_page = tail_page;
 
@@ -2368,25 +2384,24 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
 
  out_again:
 
-	rb_reset_tail(cpu_buffer, tail_page, tail, length);
+	rb_reset_tail(cpu_buffer, tail, info);
 
 	/* fail and let the caller try again */
 	return ERR_PTR(-EAGAIN);
 
  out_reset:
 	/* reset write */
-	rb_reset_tail(cpu_buffer, tail_page, tail, length);
+	rb_reset_tail(cpu_buffer, tail, info);
 
 	return NULL;
 }
 
 static struct ring_buffer_event *
 __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
-		  unsigned long length, u64 ts,
-		  u64 delta, int add_timestamp)
+		  struct rb_event_info *info)
 {
-	struct buffer_page *tail_page;
 	struct ring_buffer_event *event;
+	struct buffer_page *tail_page;
 	unsigned long tail, write;
 
 	/*
@@ -2394,33 +2409,32 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 	 * hold in the time field of the event, then we append a
 	 * TIME EXTEND event ahead of the data event.
 	 */
-	if (unlikely(add_timestamp))
-		length += RB_LEN_TIME_EXTEND;
+	if (unlikely(info->add_timestamp))
+		info->length += RB_LEN_TIME_EXTEND;
 
-	tail_page = cpu_buffer->tail_page;
-	write = local_add_return(length, &tail_page->write);
+	tail_page = info->tail_page = cpu_buffer->tail_page;
+	write = local_add_return(info->length, &tail_page->write);
 
 	/* set write to only the index of the write */
 	write &= RB_WRITE_MASK;
-	tail = write - length;
+	tail = write - info->length;
 
 	/*
 	 * If this is the first commit on the page, then it has the same
 	 * timestamp as the page itself.
 	 */
 	if (!tail)
-		delta = 0;
+		info->delta = 0;
 
 	/* See if we shot pass the end of this buffer page */
 	if (unlikely(write > BUF_PAGE_SIZE))
-		return rb_move_tail(cpu_buffer, length, tail,
-				    tail_page, ts);
+		return rb_move_tail(cpu_buffer, tail, info);
 
 	/* We reserved something on the buffer */
 
 	event = __rb_page_index(tail_page, tail);
 	kmemcheck_annotate_bitfield(event, bitfield);
-	rb_update_event(cpu_buffer, event, length, add_timestamp, delta);
+	rb_update_event(cpu_buffer, event, info);
 
 	local_inc(&tail_page->entries);
 
@@ -2429,10 +2443,10 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 	 * its timestamp.
 	 */
 	if (!tail)
-		tail_page->page->time_stamp = ts;
+		tail_page->page->time_stamp = info->ts;
 
 	/* account for these added bytes */
-	local_add(length, &cpu_buffer->entries_bytes);
+	local_add(info->length, &cpu_buffer->entries_bytes);
 
 	return event;
 }
@@ -2521,9 +2535,8 @@ rb_reserve_next_event(struct ring_buffer *buffer,
 		      unsigned long length)
 {
 	struct ring_buffer_event *event;
-	u64 ts, delta;
+	struct rb_event_info info;
 	int nr_loops = 0;
-	int add_timestamp;
 	u64 diff;
 
 	rb_start_commit(cpu_buffer);
@@ -2543,10 +2556,10 @@ rb_reserve_next_event(struct ring_buffer *buffer,
 	}
 #endif
 
-	length = rb_calculate_event_length(length);
+	info.length = rb_calculate_event_length(length);
  again:
-	add_timestamp = 0;
-	delta = 0;
+	info.add_timestamp = 0;
+	info.delta = 0;
 
 	/*
 	 * We allow for interrupts to reenter here and do a trace.
@@ -2560,35 +2573,35 @@ rb_reserve_next_event(struct ring_buffer *buffer,
 	if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
 		goto out_fail;
 
-	ts = rb_time_stamp(cpu_buffer->buffer);
-	diff = ts - cpu_buffer->write_stamp;
+	info.ts = rb_time_stamp(cpu_buffer->buffer);
+	diff = info.ts - cpu_buffer->write_stamp;
 
 	/* make sure this diff is calculated here */
 	barrier();
 
 	/* Did the write stamp get updated already? */
-	if (likely(ts >= cpu_buffer->write_stamp)) {
-		delta = diff;
-		if (unlikely(test_time_stamp(delta))) {
+	if (likely(info.ts >= cpu_buffer->write_stamp)) {
+		info.delta = diff;
+		if (unlikely(test_time_stamp(info.delta))) {
 			int local_clock_stable = 1;
 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
 			local_clock_stable = sched_clock_stable();
 #endif
-			WARN_ONCE(delta > (1ULL << 59),
+			WARN_ONCE(info.delta > (1ULL << 59),
 				  KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
-				  (unsigned long long)delta,
-				  (unsigned long long)ts,
+				  (unsigned long long)info.delta,
+				  (unsigned long long)info.ts,
 				  (unsigned long long)cpu_buffer->write_stamp,
 				  local_clock_stable ? "" :
 				  "If you just came from a suspend/resume,\n"
 				  "please switch to the trace global clock:\n"
 				  "  echo global > /sys/kernel/debug/tracing/trace_clock\n");
-			add_timestamp = 1;
+			info.add_timestamp = 1;
 		}
 	}
 
-	event = __rb_reserve_next(cpu_buffer, length, ts,
-				  delta, add_timestamp);
+	event = __rb_reserve_next(cpu_buffer, &info);
+
 	if (unlikely(PTR_ERR(event) == -EAGAIN))
 		goto again;
 

From 9826b2733a4399149072058a11f611357479229d Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Thu, 28 May 2015 17:36:45 -0400
Subject: [PATCH 039/734] ring-buffer: Move the adding of the extended
 timestamp out of line

Requiring a extended time stamp is an uncommon occurrence, and it is
best to do it out of line when needed.

Add a noinline function that handles the extended timestamp and
have it called with an unlikely to completely move it out of the
fast path.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/ring_buffer.c | 41 +++++++++++++++++++++++---------------
 1 file changed, 25 insertions(+), 16 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index ba8f25ffcf6f12..a78d4ee4bc584c 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2396,6 +2396,29 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
 	return NULL;
 }
 
+#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+static inline bool sched_clock_stable(void)
+{
+	return true;
+}
+#endif
+
+static noinline void
+rb_handle_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
+		    struct rb_event_info *info)
+{
+	WARN_ONCE(info->delta > (1ULL << 59),
+		  KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
+		  (unsigned long long)info->delta,
+		  (unsigned long long)info->ts,
+		  (unsigned long long)cpu_buffer->write_stamp,
+		  sched_clock_stable() ? "" :
+		  "If you just came from a suspend/resume,\n"
+		  "please switch to the trace global clock:\n"
+		  "  echo global > /sys/kernel/debug/tracing/trace_clock\n");
+	info->add_timestamp = 1;
+}
+
 static struct ring_buffer_event *
 __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 		  struct rb_event_info *info)
@@ -2582,22 +2605,8 @@ rb_reserve_next_event(struct ring_buffer *buffer,
 	/* Did the write stamp get updated already? */
 	if (likely(info.ts >= cpu_buffer->write_stamp)) {
 		info.delta = diff;
-		if (unlikely(test_time_stamp(info.delta))) {
-			int local_clock_stable = 1;
-#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
-			local_clock_stable = sched_clock_stable();
-#endif
-			WARN_ONCE(info.delta > (1ULL << 59),
-				  KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
-				  (unsigned long long)info.delta,
-				  (unsigned long long)info.ts,
-				  (unsigned long long)cpu_buffer->write_stamp,
-				  local_clock_stable ? "" :
-				  "If you just came from a suspend/resume,\n"
-				  "please switch to the trace global clock:\n"
-				  "  echo global > /sys/kernel/debug/tracing/trace_clock\n");
-			info.add_timestamp = 1;
-		}
+		if (unlikely(test_time_stamp(info.delta)))
+			rb_handle_timestamp(cpu_buffer, &info);
 	}
 
 	event = __rb_reserve_next(cpu_buffer, &info);

From a4543a2fa9ef31d6d0f854a4e14f8f82e7996d8d Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Fri, 29 May 2015 09:40:18 -0400
Subject: [PATCH 040/734] ring-buffer: Get timestamp after event is allocated

Move the capturing of the timestamp to after an event is allocated.
If the event is not a commit (where it is an event that preempted
another event), then no timestamp is needed, because the delta of
nested events is always zero.

If the event starts on a new page, no delta needs to be calculated
as the full timestamp will be added to the page header, and the
event will have a delta of zero.

Now if the event requires a time extend (the delta does not fit
in the 27 bit delta slot in the header), then the event is discarded,
the length is extended to hold the TIME_EXTEND event that allows for
a 59 bit delta, and the commit is tried again.

If the event can't be discarded (another event came in after it),
then the TIME_EXTEND is added directly to the allocated event and
the rest of the event is given padding.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/ring_buffer.c | 161 ++++++++++++++++++++++++++-----------
 1 file changed, 114 insertions(+), 47 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index a78d4ee4bc584c..b5ed553e0a45f9 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2009,7 +2009,7 @@ rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
  * and with this, we can determine what to place into the
  * data field.
  */
-static void
+static void __always_inline
 rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
 		struct ring_buffer_event *event,
 		struct rb_event_info *info)
@@ -2017,10 +2017,6 @@ rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
 	unsigned length = info->length;
 	u64 delta = info->delta;
 
-	/* Only a commit updates the timestamp */
-	if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
-		delta = 0;
-
 	/*
 	 * If we need to add a timestamp, then we
 	 * add it to the start of the resevered space.
@@ -2286,6 +2282,8 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
 	local_sub(length, &tail_page->write);
 }
 
+static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer);
+
 /*
  * This is the slow path, force gcc not to inline it.
  */
@@ -2300,6 +2298,16 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
 	int ret;
 	u64 ts;
 
+	/*
+	 * If the event had a timestamp attached to it, remove it.
+	 * The first event on a page (nested or not) always uses
+	 * the full timestamp of the new page.
+	 */
+	if (info->add_timestamp) {
+		info->add_timestamp = 0;
+		info->length -= RB_LEN_TIME_EXTEND;
+	}
+
 	next_page = tail_page;
 
 	rb_inc_page(cpu_buffer, &next_page);
@@ -2386,6 +2394,11 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
 
 	rb_reset_tail(cpu_buffer, tail, info);
 
+	/* Commit what we have for now to update timestamps */
+	rb_end_commit(cpu_buffer);
+	/* rb_end_commit() decs committing */
+	local_inc(&cpu_buffer->committing);
+
 	/* fail and let the caller try again */
 	return ERR_PTR(-EAGAIN);
 
@@ -2403,10 +2416,23 @@ static inline bool sched_clock_stable(void)
 }
 #endif
 
+static inline int
+rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
+		  struct ring_buffer_event *event);
+static inline void rb_event_discard(struct ring_buffer_event *event);
+static void
+rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
+		      struct ring_buffer_event *event);
+
 static noinline void
 rb_handle_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
+		    struct ring_buffer_event *event,
 		    struct rb_event_info *info)
 {
+	struct ring_buffer_event *padding;
+	int length;
+	int size;
+
 	WARN_ONCE(info->delta > (1ULL << 59),
 		  KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
 		  (unsigned long long)info->delta,
@@ -2416,7 +2442,61 @@ rb_handle_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
 		  "If you just came from a suspend/resume,\n"
 		  "please switch to the trace global clock:\n"
 		  "  echo global > /sys/kernel/debug/tracing/trace_clock\n");
-	info->add_timestamp = 1;
+
+	/*
+	 * Discarding this event to add a timestamp in front, but
+	 * we still need to update the length of it to perform the discard.
+	 */
+	rb_update_event(cpu_buffer, event, info);
+
+	if (rb_try_to_discard(cpu_buffer, event)) {
+		info->add_timestamp = 1;
+		/*
+		 * The time delta since the last event is too big to
+		 * hold in the time field of the event, then we append a
+		 * TIME EXTEND event ahead of the data event.
+		 */
+		info->length += RB_LEN_TIME_EXTEND;
+		return;
+	}
+
+	/*
+	 * Humpf! An event came in after this one, and because it is not a
+	 * commit, it will have a delta of zero, thus, it will take on
+	 * the timestamp of the previous commit, which happened a long time
+	 * ago (we need to add a timestamp, remember?).
+	 * We need to add the timestamp here. A timestamp is a fixed size
+	 * of 8 bytes. That means the rest of the event needs to be
+	 * padding.
+	 */
+	size = info->length - RB_LEN_TIME_EXTEND;
+
+	/* The padding will have a delta of 1 */
+	if (size)
+		info->delta--;
+
+	padding = rb_add_time_stamp(event, info->delta);
+
+	if (size) {
+		length = info->length;
+		info->delta = 0;
+		info->length = size;
+		rb_update_event(cpu_buffer, padding, info);
+
+		rb_event_discard(padding);
+
+		/* Still visible, need to update write_stamp */
+		rb_update_write_stamp(cpu_buffer, event);
+
+		/* Still need to commit the padding. */
+		rb_end_commit(cpu_buffer);
+
+		/* rb_end_commit() decs committing */
+		local_inc(&cpu_buffer->committing);
+
+		/* The next iteration still uses the original length */
+		info->length = length;
+	}
 }
 
 static struct ring_buffer_event *
@@ -2426,14 +2506,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 	struct ring_buffer_event *event;
 	struct buffer_page *tail_page;
 	unsigned long tail, write;
-
-	/*
-	 * If the time delta since the last event is too big to
-	 * hold in the time field of the event, then we append a
-	 * TIME EXTEND event ahead of the data event.
-	 */
-	if (unlikely(info->add_timestamp))
-		info->length += RB_LEN_TIME_EXTEND;
+	bool is_commit;
 
 	tail_page = info->tail_page = cpu_buffer->tail_page;
 	write = local_add_return(info->length, &tail_page->write);
@@ -2442,31 +2515,42 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 	write &= RB_WRITE_MASK;
 	tail = write - info->length;
 
-	/*
-	 * If this is the first commit on the page, then it has the same
-	 * timestamp as the page itself.
-	 */
-	if (!tail)
-		info->delta = 0;
-
 	/* See if we shot pass the end of this buffer page */
 	if (unlikely(write > BUF_PAGE_SIZE))
 		return rb_move_tail(cpu_buffer, tail, info);
 
 	/* We reserved something on the buffer */
-
 	event = __rb_page_index(tail_page, tail);
-	kmemcheck_annotate_bitfield(event, bitfield);
-	rb_update_event(cpu_buffer, event, info);
-
-	local_inc(&tail_page->entries);
 
 	/*
-	 * If this is the first commit on the page, then update
-	 * its timestamp.
+	 * If this is the first commit on the page, then it has the same
+	 * timestamp as the page itself, otherwise we need to figure out
+	 * the delta.
 	 */
-	if (!tail)
+	info->ts = rb_time_stamp(cpu_buffer->buffer);
+	is_commit = rb_event_is_commit(cpu_buffer, event);
+
+	/* Commits are special (non nested events) */
+	info->delta = is_commit ? info->ts - cpu_buffer->write_stamp : 0;
+
+	if (!tail) {
+		/*
+		 * If this is the first commit on the page, set the
+		 * page to its timestamp.
+		 */
 		tail_page->page->time_stamp = info->ts;
+		info->delta = 0;
+
+	} else if (unlikely(test_time_stamp(info->delta)) &&
+		   !info->add_timestamp) {
+		rb_handle_timestamp(cpu_buffer, event, info);
+		return ERR_PTR(-EAGAIN);
+	}
+
+	kmemcheck_annotate_bitfield(event, bitfield);
+	rb_update_event(cpu_buffer, event, info);
+
+	local_inc(&tail_page->entries);
 
 	/* account for these added bytes */
 	local_add(info->length, &cpu_buffer->entries_bytes);
@@ -2560,7 +2644,6 @@ rb_reserve_next_event(struct ring_buffer *buffer,
 	struct ring_buffer_event *event;
 	struct rb_event_info info;
 	int nr_loops = 0;
-	u64 diff;
 
 	rb_start_commit(cpu_buffer);
 
@@ -2578,12 +2661,9 @@ rb_reserve_next_event(struct ring_buffer *buffer,
 		return NULL;
 	}
 #endif
-
 	info.length = rb_calculate_event_length(length);
- again:
 	info.add_timestamp = 0;
-	info.delta = 0;
-
+ again:
 	/*
 	 * We allow for interrupts to reenter here and do a trace.
 	 * If one does, it will cause this original code to loop
@@ -2596,19 +2676,6 @@ rb_reserve_next_event(struct ring_buffer *buffer,
 	if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
 		goto out_fail;
 
-	info.ts = rb_time_stamp(cpu_buffer->buffer);
-	diff = info.ts - cpu_buffer->write_stamp;
-
-	/* make sure this diff is calculated here */
-	barrier();
-
-	/* Did the write stamp get updated already? */
-	if (likely(info.ts >= cpu_buffer->write_stamp)) {
-		info.delta = diff;
-		if (unlikely(test_time_stamp(info.delta)))
-			rb_handle_timestamp(cpu_buffer, &info);
-	}
-
 	event = __rb_reserve_next(cpu_buffer, &info);
 
 	if (unlikely(PTR_ERR(event) == -EAGAIN))

From 7d75e6833b579adb3de2c7b917de1204eeafea47 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Fri, 29 May 2015 10:29:10 -0400
Subject: [PATCH 041/734] ring-buffer: Make sure event has enough room for
 extend and padding

Now that events only add time extends after it is committed, in case
an event comes in before it can discard the allocated event, the time
extend needs to be stored within the event. If the event is bigger
than then size needed for the time extend, padding must be added.
The minimum padding size is 8 bytes. Thus if the event is 12 bytes
(size of time extend + 4), there will not be enough room to add both
the time extend and padding. Make sure all events are either 8 bytes
or 16 or more bytes.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/ring_buffer.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index b5ed553e0a45f9..781ce359976c97 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2208,6 +2208,21 @@ static unsigned rb_calculate_event_length(unsigned length)
 	length += RB_EVNT_HDR_SIZE;
 	length = ALIGN(length, RB_ARCH_ALIGNMENT);
 
+	/*
+	 * In case the time delta is larger than the 27 bits for it
+	 * in the header, we need to add a timestamp. If another
+	 * event comes in when trying to discard this one to increase
+	 * the length, then the timestamp will be added in the allocated
+	 * space of this event. If length is bigger than the size needed
+	 * for the TIME_EXTEND, then padding has to be used. The events
+	 * length must be either RB_LEN_TIME_EXTEND, or greater than or equal
+	 * to RB_LEN_TIME_EXTEND + 8, as 8 is the minimum size for padding.
+	 * As length is a multiple of 4, we only need to worry if it
+	 * is 12 (RB_LEN_TIME_EXTEND + 4).
+	 */
+	if (length == RB_LEN_TIME_EXTEND + RB_ALIGNMENT)
+		length += RB_ALIGNMENT;
+
 	return length;
 }
 

From d90fd77402d3de56a9ca3df04e5d868d0979dc59 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Fri, 29 May 2015 12:12:27 -0400
Subject: [PATCH 042/734] ring-buffer: Reorganize function locations

Functions in ring-buffer.c have gotten interleaved between different
use cases. Move the functions around to get like functions closer
together. This may or may not help gcc keep cache locality, but it
makes it a little easier to work with the code.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/ring_buffer.c | 920 ++++++++++++++++++-------------------
 1 file changed, 456 insertions(+), 464 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 781ce359976c97..1cce0fbf92cea0 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1887,73 +1887,6 @@ rb_event_index(struct ring_buffer_event *event)
 	return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE;
 }
 
-static inline int
-rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
-		   struct ring_buffer_event *event)
-{
-	unsigned long addr = (unsigned long)event;
-	unsigned long index;
-
-	index = rb_event_index(event);
-	addr &= PAGE_MASK;
-
-	return cpu_buffer->commit_page->page == (void *)addr &&
-		rb_commit_index(cpu_buffer) == index;
-}
-
-static void
-rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
-{
-	unsigned long max_count;
-
-	/*
-	 * We only race with interrupts and NMIs on this CPU.
-	 * If we own the commit event, then we can commit
-	 * all others that interrupted us, since the interruptions
-	 * are in stack format (they finish before they come
-	 * back to us). This allows us to do a simple loop to
-	 * assign the commit to the tail.
-	 */
- again:
-	max_count = cpu_buffer->nr_pages * 100;
-
-	while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
-		if (RB_WARN_ON(cpu_buffer, !(--max_count)))
-			return;
-		if (RB_WARN_ON(cpu_buffer,
-			       rb_is_reader_page(cpu_buffer->tail_page)))
-			return;
-		local_set(&cpu_buffer->commit_page->page->commit,
-			  rb_page_write(cpu_buffer->commit_page));
-		rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
-		cpu_buffer->write_stamp =
-			cpu_buffer->commit_page->page->time_stamp;
-		/* add barrier to keep gcc from optimizing too much */
-		barrier();
-	}
-	while (rb_commit_index(cpu_buffer) !=
-	       rb_page_write(cpu_buffer->commit_page)) {
-
-		local_set(&cpu_buffer->commit_page->page->commit,
-			  rb_page_write(cpu_buffer->commit_page));
-		RB_WARN_ON(cpu_buffer,
-			   local_read(&cpu_buffer->commit_page->page->commit) &
-			   ~RB_WRITE_MASK);
-		barrier();
-	}
-
-	/* again, keep gcc from optimizing */
-	barrier();
-
-	/*
-	 * If an interrupt came in just after the first while loop
-	 * and pushed the tail page forward, we will be left with
-	 * a dangling commit that will never go forward.
-	 */
-	if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page))
-		goto again;
-}
-
 static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 {
 	cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp;
@@ -1979,63 +1912,6 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
 	iter->head = 0;
 }
 
-/* Slow path, do not inline */
-static noinline struct ring_buffer_event *
-rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
-{
-	event->type_len = RINGBUF_TYPE_TIME_EXTEND;
-
-	/* Not the first event on the page? */
-	if (rb_event_index(event)) {
-		event->time_delta = delta & TS_MASK;
-		event->array[0] = delta >> TS_SHIFT;
-	} else {
-		/* nope, just zero it */
-		event->time_delta = 0;
-		event->array[0] = 0;
-	}
-
-	return skip_time_extend(event);
-}
-
-/**
- * rb_update_event - update event type and data
- * @event: the event to update
- * @type: the type of event
- * @length: the size of the event field in the ring buffer
- *
- * Update the type and data fields of the event. The length
- * is the actual size that is written to the ring buffer,
- * and with this, we can determine what to place into the
- * data field.
- */
-static void __always_inline
-rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
-		struct ring_buffer_event *event,
-		struct rb_event_info *info)
-{
-	unsigned length = info->length;
-	u64 delta = info->delta;
-
-	/*
-	 * If we need to add a timestamp, then we
-	 * add it to the start of the resevered space.
-	 */
-	if (unlikely(info->add_timestamp)) {
-		event = rb_add_time_stamp(event, delta);
-		length -= RB_LEN_TIME_EXTEND;
-		delta = 0;
-	}
-
-	event->time_delta = delta;
-	length -= RB_EVNT_HDR_SIZE;
-	if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
-		event->type_len = 0;
-		event->array[0] = length;
-	} else
-		event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
-}
-
 /*
  * rb_handle_head_page - writer hit the head page
  *
@@ -2194,38 +2070,6 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
 	return 0;
 }
 
-static unsigned rb_calculate_event_length(unsigned length)
-{
-	struct ring_buffer_event event; /* Used only for sizeof array */
-
-	/* zero length can cause confusions */
-	if (!length)
-		length++;
-
-	if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
-		length += sizeof(event.array[0]);
-
-	length += RB_EVNT_HDR_SIZE;
-	length = ALIGN(length, RB_ARCH_ALIGNMENT);
-
-	/*
-	 * In case the time delta is larger than the 27 bits for it
-	 * in the header, we need to add a timestamp. If another
-	 * event comes in when trying to discard this one to increase
-	 * the length, then the timestamp will be added in the allocated
-	 * space of this event. If length is bigger than the size needed
-	 * for the TIME_EXTEND, then padding has to be used. The events
-	 * length must be either RB_LEN_TIME_EXTEND, or greater than or equal
-	 * to RB_LEN_TIME_EXTEND + 8, as 8 is the minimum size for padding.
-	 * As length is a multiple of 4, we only need to worry if it
-	 * is 12 (RB_LEN_TIME_EXTEND + 4).
-	 */
-	if (length == RB_LEN_TIME_EXTEND + RB_ALIGNMENT)
-		length += RB_ALIGNMENT;
-
-	return length;
-}
-
 static inline void
 rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
 	      unsigned long tail, struct rb_event_info *info)
@@ -2424,71 +2268,471 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
 	return NULL;
 }
 
-#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
-static inline bool sched_clock_stable(void)
+/* Slow path, do not inline */
+static noinline struct ring_buffer_event *
+rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
 {
-	return true;
-}
-#endif
+	event->type_len = RINGBUF_TYPE_TIME_EXTEND;
 
-static inline int
-rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
-		  struct ring_buffer_event *event);
-static inline void rb_event_discard(struct ring_buffer_event *event);
-static void
-rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
-		      struct ring_buffer_event *event);
+	/* Not the first event on the page? */
+	if (rb_event_index(event)) {
+		event->time_delta = delta & TS_MASK;
+		event->array[0] = delta >> TS_SHIFT;
+	} else {
+		/* nope, just zero it */
+		event->time_delta = 0;
+		event->array[0] = 0;
+	}
 
-static noinline void
-rb_handle_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
-		    struct ring_buffer_event *event,
-		    struct rb_event_info *info)
-{
-	struct ring_buffer_event *padding;
-	int length;
-	int size;
+	return skip_time_extend(event);
+}
 
-	WARN_ONCE(info->delta > (1ULL << 59),
-		  KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
-		  (unsigned long long)info->delta,
-		  (unsigned long long)info->ts,
-		  (unsigned long long)cpu_buffer->write_stamp,
-		  sched_clock_stable() ? "" :
-		  "If you just came from a suspend/resume,\n"
-		  "please switch to the trace global clock:\n"
-		  "  echo global > /sys/kernel/debug/tracing/trace_clock\n");
+/**
+ * rb_update_event - update event type and data
+ * @event: the event to update
+ * @type: the type of event
+ * @length: the size of the event field in the ring buffer
+ *
+ * Update the type and data fields of the event. The length
+ * is the actual size that is written to the ring buffer,
+ * and with this, we can determine what to place into the
+ * data field.
+ */
+static void __always_inline
+rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
+		struct ring_buffer_event *event,
+		struct rb_event_info *info)
+{
+	unsigned length = info->length;
+	u64 delta = info->delta;
 
 	/*
-	 * Discarding this event to add a timestamp in front, but
-	 * we still need to update the length of it to perform the discard.
+	 * If we need to add a timestamp, then we
+	 * add it to the start of the resevered space.
 	 */
-	rb_update_event(cpu_buffer, event, info);
-
-	if (rb_try_to_discard(cpu_buffer, event)) {
-		info->add_timestamp = 1;
-		/*
-		 * The time delta since the last event is too big to
-		 * hold in the time field of the event, then we append a
-		 * TIME EXTEND event ahead of the data event.
-		 */
-		info->length += RB_LEN_TIME_EXTEND;
-		return;
+	if (unlikely(info->add_timestamp)) {
+		event = rb_add_time_stamp(event, delta);
+		length -= RB_LEN_TIME_EXTEND;
+		delta = 0;
 	}
 
-	/*
-	 * Humpf! An event came in after this one, and because it is not a
-	 * commit, it will have a delta of zero, thus, it will take on
-	 * the timestamp of the previous commit, which happened a long time
-	 * ago (we need to add a timestamp, remember?).
-	 * We need to add the timestamp here. A timestamp is a fixed size
-	 * of 8 bytes. That means the rest of the event needs to be
-	 * padding.
-	 */
-	size = info->length - RB_LEN_TIME_EXTEND;
-
-	/* The padding will have a delta of 1 */
-	if (size)
-		info->delta--;
+	event->time_delta = delta;
+	length -= RB_EVNT_HDR_SIZE;
+	if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
+		event->type_len = 0;
+		event->array[0] = length;
+	} else
+		event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
+}
+
+static unsigned rb_calculate_event_length(unsigned length)
+{
+	struct ring_buffer_event event; /* Used only for sizeof array */
+
+	/* zero length can cause confusions */
+	if (!length)
+		length++;
+
+	if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
+		length += sizeof(event.array[0]);
+
+	length += RB_EVNT_HDR_SIZE;
+	length = ALIGN(length, RB_ARCH_ALIGNMENT);
+
+	/*
+	 * In case the time delta is larger than the 27 bits for it
+	 * in the header, we need to add a timestamp. If another
+	 * event comes in when trying to discard this one to increase
+	 * the length, then the timestamp will be added in the allocated
+	 * space of this event. If length is bigger than the size needed
+	 * for the TIME_EXTEND, then padding has to be used. The events
+	 * length must be either RB_LEN_TIME_EXTEND, or greater than or equal
+	 * to RB_LEN_TIME_EXTEND + 8, as 8 is the minimum size for padding.
+	 * As length is a multiple of 4, we only need to worry if it
+	 * is 12 (RB_LEN_TIME_EXTEND + 4).
+	 */
+	if (length == RB_LEN_TIME_EXTEND + RB_ALIGNMENT)
+		length += RB_ALIGNMENT;
+
+	return length;
+}
+
+#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+static inline bool sched_clock_stable(void)
+{
+	return true;
+}
+#endif
+
+static inline int
+rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
+		  struct ring_buffer_event *event)
+{
+	unsigned long new_index, old_index;
+	struct buffer_page *bpage;
+	unsigned long index;
+	unsigned long addr;
+
+	new_index = rb_event_index(event);
+	old_index = new_index + rb_event_ts_length(event);
+	addr = (unsigned long)event;
+	addr &= PAGE_MASK;
+
+	bpage = cpu_buffer->tail_page;
+
+	if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
+		unsigned long write_mask =
+			local_read(&bpage->write) & ~RB_WRITE_MASK;
+		unsigned long event_length = rb_event_length(event);
+		/*
+		 * This is on the tail page. It is possible that
+		 * a write could come in and move the tail page
+		 * and write to the next page. That is fine
+		 * because we just shorten what is on this page.
+		 */
+		old_index += write_mask;
+		new_index += write_mask;
+		index = local_cmpxchg(&bpage->write, old_index, new_index);
+		if (index == old_index) {
+			/* update counters */
+			local_sub(event_length, &cpu_buffer->entries_bytes);
+			return 1;
+		}
+	}
+
+	/* could not discard */
+	return 0;
+}
+
+static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	local_inc(&cpu_buffer->committing);
+	local_inc(&cpu_buffer->commits);
+}
+
+static void
+rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	unsigned long max_count;
+
+	/*
+	 * We only race with interrupts and NMIs on this CPU.
+	 * If we own the commit event, then we can commit
+	 * all others that interrupted us, since the interruptions
+	 * are in stack format (they finish before they come
+	 * back to us). This allows us to do a simple loop to
+	 * assign the commit to the tail.
+	 */
+ again:
+	max_count = cpu_buffer->nr_pages * 100;
+
+	while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
+		if (RB_WARN_ON(cpu_buffer, !(--max_count)))
+			return;
+		if (RB_WARN_ON(cpu_buffer,
+			       rb_is_reader_page(cpu_buffer->tail_page)))
+			return;
+		local_set(&cpu_buffer->commit_page->page->commit,
+			  rb_page_write(cpu_buffer->commit_page));
+		rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
+		cpu_buffer->write_stamp =
+			cpu_buffer->commit_page->page->time_stamp;
+		/* add barrier to keep gcc from optimizing too much */
+		barrier();
+	}
+	while (rb_commit_index(cpu_buffer) !=
+	       rb_page_write(cpu_buffer->commit_page)) {
+
+		local_set(&cpu_buffer->commit_page->page->commit,
+			  rb_page_write(cpu_buffer->commit_page));
+		RB_WARN_ON(cpu_buffer,
+			   local_read(&cpu_buffer->commit_page->page->commit) &
+			   ~RB_WRITE_MASK);
+		barrier();
+	}
+
+	/* again, keep gcc from optimizing */
+	barrier();
+
+	/*
+	 * If an interrupt came in just after the first while loop
+	 * and pushed the tail page forward, we will be left with
+	 * a dangling commit that will never go forward.
+	 */
+	if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page))
+		goto again;
+}
+
+static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	unsigned long commits;
+
+	if (RB_WARN_ON(cpu_buffer,
+		       !local_read(&cpu_buffer->committing)))
+		return;
+
+ again:
+	commits = local_read(&cpu_buffer->commits);
+	/* synchronize with interrupts */
+	barrier();
+	if (local_read(&cpu_buffer->committing) == 1)
+		rb_set_commit_to_write(cpu_buffer);
+
+	local_dec(&cpu_buffer->committing);
+
+	/* synchronize with interrupts */
+	barrier();
+
+	/*
+	 * Need to account for interrupts coming in between the
+	 * updating of the commit page and the clearing of the
+	 * committing counter.
+	 */
+	if (unlikely(local_read(&cpu_buffer->commits) != commits) &&
+	    !local_read(&cpu_buffer->committing)) {
+		local_inc(&cpu_buffer->committing);
+		goto again;
+	}
+}
+
+static inline void rb_event_discard(struct ring_buffer_event *event)
+{
+	if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+		event = skip_time_extend(event);
+
+	/* array[0] holds the actual length for the discarded event */
+	event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
+	event->type_len = RINGBUF_TYPE_PADDING;
+	/* time delta must be non zero */
+	if (!event->time_delta)
+		event->time_delta = 1;
+}
+
+static inline int
+rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
+		   struct ring_buffer_event *event)
+{
+	unsigned long addr = (unsigned long)event;
+	unsigned long index;
+
+	index = rb_event_index(event);
+	addr &= PAGE_MASK;
+
+	return cpu_buffer->commit_page->page == (void *)addr &&
+		rb_commit_index(cpu_buffer) == index;
+}
+
+static void
+rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
+		      struct ring_buffer_event *event)
+{
+	u64 delta;
+
+	/*
+	 * The event first in the commit queue updates the
+	 * time stamp.
+	 */
+	if (rb_event_is_commit(cpu_buffer, event)) {
+		/*
+		 * A commit event that is first on a page
+		 * updates the write timestamp with the page stamp
+		 */
+		if (!rb_event_index(event))
+			cpu_buffer->write_stamp =
+				cpu_buffer->commit_page->page->time_stamp;
+		else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
+			delta = event->array[0];
+			delta <<= TS_SHIFT;
+			delta += event->time_delta;
+			cpu_buffer->write_stamp += delta;
+		} else
+			cpu_buffer->write_stamp += event->time_delta;
+	}
+}
+
+static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
+		      struct ring_buffer_event *event)
+{
+	local_inc(&cpu_buffer->entries);
+	rb_update_write_stamp(cpu_buffer, event);
+	rb_end_commit(cpu_buffer);
+}
+
+static __always_inline void
+rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
+{
+	bool pagebusy;
+
+	if (buffer->irq_work.waiters_pending) {
+		buffer->irq_work.waiters_pending = false;
+		/* irq_work_queue() supplies it's own memory barriers */
+		irq_work_queue(&buffer->irq_work.work);
+	}
+
+	if (cpu_buffer->irq_work.waiters_pending) {
+		cpu_buffer->irq_work.waiters_pending = false;
+		/* irq_work_queue() supplies it's own memory barriers */
+		irq_work_queue(&cpu_buffer->irq_work.work);
+	}
+
+	pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
+
+	if (!pagebusy && cpu_buffer->irq_work.full_waiters_pending) {
+		cpu_buffer->irq_work.wakeup_full = true;
+		cpu_buffer->irq_work.full_waiters_pending = false;
+		/* irq_work_queue() supplies it's own memory barriers */
+		irq_work_queue(&cpu_buffer->irq_work.work);
+	}
+}
+
+/*
+ * The lock and unlock are done within a preempt disable section.
+ * The current_context per_cpu variable can only be modified
+ * by the current task between lock and unlock. But it can
+ * be modified more than once via an interrupt. To pass this
+ * information from the lock to the unlock without having to
+ * access the 'in_interrupt()' functions again (which do show
+ * a bit of overhead in something as critical as function tracing,
+ * we use a bitmask trick.
+ *
+ *  bit 0 =  NMI context
+ *  bit 1 =  IRQ context
+ *  bit 2 =  SoftIRQ context
+ *  bit 3 =  normal context.
+ *
+ * This works because this is the order of contexts that can
+ * preempt other contexts. A SoftIRQ never preempts an IRQ
+ * context.
+ *
+ * When the context is determined, the corresponding bit is
+ * checked and set (if it was set, then a recursion of that context
+ * happened).
+ *
+ * On unlock, we need to clear this bit. To do so, just subtract
+ * 1 from the current_context and AND it to itself.
+ *
+ * (binary)
+ *  101 - 1 = 100
+ *  101 & 100 = 100 (clearing bit zero)
+ *
+ *  1010 - 1 = 1001
+ *  1010 & 1001 = 1000 (clearing bit 1)
+ *
+ * The least significant bit can be cleared this way, and it
+ * just so happens that it is the same bit corresponding to
+ * the current context.
+ */
+
+static __always_inline int
+trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	unsigned int val = cpu_buffer->current_context;
+	int bit;
+
+	if (in_interrupt()) {
+		if (in_nmi())
+			bit = RB_CTX_NMI;
+		else if (in_irq())
+			bit = RB_CTX_IRQ;
+		else
+			bit = RB_CTX_SOFTIRQ;
+	} else
+		bit = RB_CTX_NORMAL;
+
+	if (unlikely(val & (1 << bit)))
+		return 1;
+
+	val |= (1 << bit);
+	cpu_buffer->current_context = val;
+
+	return 0;
+}
+
+static __always_inline void
+trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	cpu_buffer->current_context &= cpu_buffer->current_context - 1;
+}
+
+/**
+ * ring_buffer_unlock_commit - commit a reserved
+ * @buffer: The buffer to commit to
+ * @event: The event pointer to commit.
+ *
+ * This commits the data to the ring buffer, and releases any locks held.
+ *
+ * Must be paired with ring_buffer_lock_reserve.
+ */
+int ring_buffer_unlock_commit(struct ring_buffer *buffer,
+			      struct ring_buffer_event *event)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	int cpu = raw_smp_processor_id();
+
+	cpu_buffer = buffer->buffers[cpu];
+
+	rb_commit(cpu_buffer, event);
+
+	rb_wakeups(buffer, cpu_buffer);
+
+	trace_recursive_unlock(cpu_buffer);
+
+	preempt_enable_notrace();
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
+
+static noinline void
+rb_handle_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
+		    struct ring_buffer_event *event,
+		    struct rb_event_info *info)
+{
+	struct ring_buffer_event *padding;
+	int length;
+	int size;
+
+	WARN_ONCE(info->delta > (1ULL << 59),
+		  KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
+		  (unsigned long long)info->delta,
+		  (unsigned long long)info->ts,
+		  (unsigned long long)cpu_buffer->write_stamp,
+		  sched_clock_stable() ? "" :
+		  "If you just came from a suspend/resume,\n"
+		  "please switch to the trace global clock:\n"
+		  "  echo global > /sys/kernel/debug/tracing/trace_clock\n");
+
+	/*
+	 * Discarding this event to add a timestamp in front, but
+	 * we still need to update the length of it to perform the discard.
+	 */
+	rb_update_event(cpu_buffer, event, info);
+
+	if (rb_try_to_discard(cpu_buffer, event)) {
+		info->add_timestamp = 1;
+		/*
+		 * The time delta since the last event is too big to
+		 * hold in the time field of the event, then we append a
+		 * TIME EXTEND event ahead of the data event.
+		 */
+		info->length += RB_LEN_TIME_EXTEND;
+		return;
+	}
+
+	/*
+	 * Humpf! An event came in after this one, and because it is not a
+	 * commit, it will have a delta of zero, thus, it will take on
+	 * the timestamp of the previous commit, which happened a long time
+	 * ago (we need to add a timestamp, remember?).
+	 * We need to add the timestamp here. A timestamp is a fixed size
+	 * of 8 bytes. That means the rest of the event needs to be
+	 * padding.
+	 */
+	size = info->length - RB_LEN_TIME_EXTEND;
+
+	/* The padding will have a delta of 1 */
+	if (size)
+		info->delta--;
 
 	padding = rb_add_time_stamp(event, info->delta);
 
@@ -2573,84 +2817,6 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 	return event;
 }
 
-static inline int
-rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
-		  struct ring_buffer_event *event)
-{
-	unsigned long new_index, old_index;
-	struct buffer_page *bpage;
-	unsigned long index;
-	unsigned long addr;
-
-	new_index = rb_event_index(event);
-	old_index = new_index + rb_event_ts_length(event);
-	addr = (unsigned long)event;
-	addr &= PAGE_MASK;
-
-	bpage = cpu_buffer->tail_page;
-
-	if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
-		unsigned long write_mask =
-			local_read(&bpage->write) & ~RB_WRITE_MASK;
-		unsigned long event_length = rb_event_length(event);
-		/*
-		 * This is on the tail page. It is possible that
-		 * a write could come in and move the tail page
-		 * and write to the next page. That is fine
-		 * because we just shorten what is on this page.
-		 */
-		old_index += write_mask;
-		new_index += write_mask;
-		index = local_cmpxchg(&bpage->write, old_index, new_index);
-		if (index == old_index) {
-			/* update counters */
-			local_sub(event_length, &cpu_buffer->entries_bytes);
-			return 1;
-		}
-	}
-
-	/* could not discard */
-	return 0;
-}
-
-static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
-{
-	local_inc(&cpu_buffer->committing);
-	local_inc(&cpu_buffer->commits);
-}
-
-static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
-{
-	unsigned long commits;
-
-	if (RB_WARN_ON(cpu_buffer,
-		       !local_read(&cpu_buffer->committing)))
-		return;
-
- again:
-	commits = local_read(&cpu_buffer->commits);
-	/* synchronize with interrupts */
-	barrier();
-	if (local_read(&cpu_buffer->committing) == 1)
-		rb_set_commit_to_write(cpu_buffer);
-
-	local_dec(&cpu_buffer->committing);
-
-	/* synchronize with interrupts */
-	barrier();
-
-	/*
-	 * Need to account for interrupts coming in between the
-	 * updating of the commit page and the clearing of the
-	 * committing counter.
-	 */
-	if (unlikely(local_read(&cpu_buffer->commits) != commits) &&
-	    !local_read(&cpu_buffer->committing)) {
-		local_inc(&cpu_buffer->committing);
-		goto again;
-	}
-}
-
 static struct ring_buffer_event *
 rb_reserve_next_event(struct ring_buffer *buffer,
 		      struct ring_buffer_per_cpu *cpu_buffer,
@@ -2706,75 +2872,6 @@ rb_reserve_next_event(struct ring_buffer *buffer,
 	return NULL;
 }
 
-/*
- * The lock and unlock are done within a preempt disable section.
- * The current_context per_cpu variable can only be modified
- * by the current task between lock and unlock. But it can
- * be modified more than once via an interrupt. To pass this
- * information from the lock to the unlock without having to
- * access the 'in_interrupt()' functions again (which do show
- * a bit of overhead in something as critical as function tracing,
- * we use a bitmask trick.
- *
- *  bit 0 =  NMI context
- *  bit 1 =  IRQ context
- *  bit 2 =  SoftIRQ context
- *  bit 3 =  normal context.
- *
- * This works because this is the order of contexts that can
- * preempt other contexts. A SoftIRQ never preempts an IRQ
- * context.
- *
- * When the context is determined, the corresponding bit is
- * checked and set (if it was set, then a recursion of that context
- * happened).
- *
- * On unlock, we need to clear this bit. To do so, just subtract
- * 1 from the current_context and AND it to itself.
- *
- * (binary)
- *  101 - 1 = 100
- *  101 & 100 = 100 (clearing bit zero)
- *
- *  1010 - 1 = 1001
- *  1010 & 1001 = 1000 (clearing bit 1)
- *
- * The least significant bit can be cleared this way, and it
- * just so happens that it is the same bit corresponding to
- * the current context.
- */
-
-static __always_inline int
-trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
-{
-	unsigned int val = cpu_buffer->current_context;
-	int bit;
-
-	if (in_interrupt()) {
-		if (in_nmi())
-			bit = RB_CTX_NMI;
-		else if (in_irq())
-			bit = RB_CTX_IRQ;
-		else
-			bit = RB_CTX_SOFTIRQ;
-	} else
-		bit = RB_CTX_NORMAL;
-
-	if (unlikely(val & (1 << bit)))
-		return 1;
-
-	val |= (1 << bit);
-	cpu_buffer->current_context = val;
-
-	return 0;
-}
-
-static __always_inline void
-trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
-{
-	cpu_buffer->current_context &= cpu_buffer->current_context - 1;
-}
-
 /**
  * ring_buffer_lock_reserve - reserve a part of the buffer
  * @buffer: the ring buffer to reserve from
@@ -2833,111 +2930,6 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
 }
 EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
 
-static void
-rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
-		      struct ring_buffer_event *event)
-{
-	u64 delta;
-
-	/*
-	 * The event first in the commit queue updates the
-	 * time stamp.
-	 */
-	if (rb_event_is_commit(cpu_buffer, event)) {
-		/*
-		 * A commit event that is first on a page
-		 * updates the write timestamp with the page stamp
-		 */
-		if (!rb_event_index(event))
-			cpu_buffer->write_stamp =
-				cpu_buffer->commit_page->page->time_stamp;
-		else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
-			delta = event->array[0];
-			delta <<= TS_SHIFT;
-			delta += event->time_delta;
-			cpu_buffer->write_stamp += delta;
-		} else
-			cpu_buffer->write_stamp += event->time_delta;
-	}
-}
-
-static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
-		      struct ring_buffer_event *event)
-{
-	local_inc(&cpu_buffer->entries);
-	rb_update_write_stamp(cpu_buffer, event);
-	rb_end_commit(cpu_buffer);
-}
-
-static __always_inline void
-rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
-{
-	bool pagebusy;
-
-	if (buffer->irq_work.waiters_pending) {
-		buffer->irq_work.waiters_pending = false;
-		/* irq_work_queue() supplies it's own memory barriers */
-		irq_work_queue(&buffer->irq_work.work);
-	}
-
-	if (cpu_buffer->irq_work.waiters_pending) {
-		cpu_buffer->irq_work.waiters_pending = false;
-		/* irq_work_queue() supplies it's own memory barriers */
-		irq_work_queue(&cpu_buffer->irq_work.work);
-	}
-
-	pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
-
-	if (!pagebusy && cpu_buffer->irq_work.full_waiters_pending) {
-		cpu_buffer->irq_work.wakeup_full = true;
-		cpu_buffer->irq_work.full_waiters_pending = false;
-		/* irq_work_queue() supplies it's own memory barriers */
-		irq_work_queue(&cpu_buffer->irq_work.work);
-	}
-}
-
-/**
- * ring_buffer_unlock_commit - commit a reserved
- * @buffer: The buffer to commit to
- * @event: The event pointer to commit.
- *
- * This commits the data to the ring buffer, and releases any locks held.
- *
- * Must be paired with ring_buffer_lock_reserve.
- */
-int ring_buffer_unlock_commit(struct ring_buffer *buffer,
-			      struct ring_buffer_event *event)
-{
-	struct ring_buffer_per_cpu *cpu_buffer;
-	int cpu = raw_smp_processor_id();
-
-	cpu_buffer = buffer->buffers[cpu];
-
-	rb_commit(cpu_buffer, event);
-
-	rb_wakeups(buffer, cpu_buffer);
-
-	trace_recursive_unlock(cpu_buffer);
-
-	preempt_enable_notrace();
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
-
-static inline void rb_event_discard(struct ring_buffer_event *event)
-{
-	if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
-		event = skip_time_extend(event);
-
-	/* array[0] holds the actual length for the discarded event */
-	event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
-	event->type_len = RINGBUF_TYPE_PADDING;
-	/* time delta must be non zero */
-	if (!event->time_delta)
-		event->time_delta = 1;
-}
-
 /*
  * Decrement the entries to the page that an event is on.
  * The event does not even need to exist, only the pointer

From 72ac426a5bb0cec572d26b4456f8c1e14601694e Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Thu, 16 Jul 2015 13:24:54 -0400
Subject: [PATCH 043/734] tracing: Clean up stack tracing and fix fentry
 updates

Akashi Takahiro was porting the stack tracer to arm64 and found some
issues with it. One was that it repeats the top function, due to the
stack frame added by the mcount caller and added by itself. This
was added when fentry came in, and before fentry created its own stack
frame. But x86's fentry now creates its own stack frame, and there's
no need to insert the function again.

This also cleans up the code a bit, where it doesn't need to do something
special for fentry, and doesn't include insertion of a duplicate
entry for the called function being traced.

Link: http://lkml.kernel.org/r/55A646EE.6030402@linaro.org

Some-suggestions-by: Jungseok Lee <jungseoklee85@gmail.com>
Some-suggestions-by: Mark Rutland <mark.rutland@arm.com>
Reported-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace_stack.c | 68 +++++++++++++-------------------------
 1 file changed, 23 insertions(+), 45 deletions(-)

diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 3f34496244e936..b746399ab59c01 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -18,12 +18,6 @@
 
 #define STACK_TRACE_ENTRIES 500
 
-#ifdef CC_USING_FENTRY
-# define fentry		1
-#else
-# define fentry		0
-#endif
-
 static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] =
 	 { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX };
 static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
@@ -35,7 +29,7 @@ static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
  */
 static struct stack_trace max_stack_trace = {
 	.max_entries		= STACK_TRACE_ENTRIES - 1,
-	.entries		= &stack_dump_trace[1],
+	.entries		= &stack_dump_trace[0],
 };
 
 static unsigned long max_stack_size;
@@ -55,7 +49,7 @@ static inline void print_max_stack(void)
 
 	pr_emerg("        Depth    Size   Location    (%d entries)\n"
 			   "        -----    ----   --------\n",
-			   max_stack_trace.nr_entries - 1);
+			   max_stack_trace.nr_entries);
 
 	for (i = 0; i < max_stack_trace.nr_entries; i++) {
 		if (stack_dump_trace[i] == ULONG_MAX)
@@ -77,7 +71,7 @@ check_stack(unsigned long ip, unsigned long *stack)
 	unsigned long this_size, flags; unsigned long *p, *top, *start;
 	static int tracer_frame;
 	int frame_size = ACCESS_ONCE(tracer_frame);
-	int i;
+	int i, x;
 
 	this_size = ((unsigned long)stack) & (THREAD_SIZE-1);
 	this_size = THREAD_SIZE - this_size;
@@ -105,26 +99,20 @@ check_stack(unsigned long ip, unsigned long *stack)
 	max_stack_size = this_size;
 
 	max_stack_trace.nr_entries = 0;
-
-	if (using_ftrace_ops_list_func())
-		max_stack_trace.skip = 4;
-	else
-		max_stack_trace.skip = 3;
+	max_stack_trace.skip = 3;
 
 	save_stack_trace(&max_stack_trace);
 
-	/*
-	 * Add the passed in ip from the function tracer.
-	 * Searching for this on the stack will skip over
-	 * most of the overhead from the stack tracer itself.
-	 */
-	stack_dump_trace[0] = ip;
-	max_stack_trace.nr_entries++;
+	/* Skip over the overhead of the stack tracer itself */
+	for (i = 0; i < max_stack_trace.nr_entries; i++) {
+		if (stack_dump_trace[i] == ip)
+			break;
+	}
 
 	/*
 	 * Now find where in the stack these are.
 	 */
-	i = 0;
+	x = 0;
 	start = stack;
 	top = (unsigned long *)
 		(((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE);
@@ -139,12 +127,15 @@ check_stack(unsigned long ip, unsigned long *stack)
 	while (i < max_stack_trace.nr_entries) {
 		int found = 0;
 
-		stack_dump_index[i] = this_size;
+		stack_dump_index[x] = this_size;
 		p = start;
 
 		for (; p < top && i < max_stack_trace.nr_entries; p++) {
+			if (stack_dump_trace[i] == ULONG_MAX)
+				break;
 			if (*p == stack_dump_trace[i]) {
-				this_size = stack_dump_index[i++] =
+				stack_dump_trace[x] = stack_dump_trace[i++];
+				this_size = stack_dump_index[x++] =
 					(top - p) * sizeof(unsigned long);
 				found = 1;
 				/* Start the search from here */
@@ -156,7 +147,7 @@ check_stack(unsigned long ip, unsigned long *stack)
 				 * out what that is, then figure it out
 				 * now.
 				 */
-				if (unlikely(!tracer_frame) && i == 1) {
+				if (unlikely(!tracer_frame)) {
 					tracer_frame = (p - stack) *
 						sizeof(unsigned long);
 					max_stack_size -= tracer_frame;
@@ -168,6 +159,10 @@ check_stack(unsigned long ip, unsigned long *stack)
 			i++;
 	}
 
+	max_stack_trace.nr_entries = x;
+	for (; x < i; x++)
+		stack_dump_trace[x] = ULONG_MAX;
+
 	if (task_stack_end_corrupted(current)) {
 		print_max_stack();
 		BUG();
@@ -192,24 +187,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip,
 	if (per_cpu(trace_active, cpu)++ != 0)
 		goto out;
 
-	/*
-	 * When fentry is used, the traced function does not get
-	 * its stack frame set up, and we lose the parent.
-	 * The ip is pretty useless because the function tracer
-	 * was called before that function set up its stack frame.
-	 * In this case, we use the parent ip.
-	 *
-	 * By adding the return address of either the parent ip
-	 * or the current ip we can disregard most of the stack usage
-	 * caused by the stack tracer itself.
-	 *
-	 * The function tracer always reports the address of where the
-	 * mcount call was, but the stack will hold the return address.
-	 */
-	if (fentry)
-		ip = parent_ip;
-	else
-		ip += MCOUNT_INSN_SIZE;
+	ip += MCOUNT_INSN_SIZE;
 
 	check_stack(ip, &stack);
 
@@ -284,7 +262,7 @@ __next(struct seq_file *m, loff_t *pos)
 {
 	long n = *pos - 1;
 
-	if (n >= max_stack_trace.nr_entries || stack_dump_trace[n] == ULONG_MAX)
+	if (n > max_stack_trace.nr_entries || stack_dump_trace[n] == ULONG_MAX)
 		return NULL;
 
 	m->private = (void *)n;
@@ -354,7 +332,7 @@ static int t_show(struct seq_file *m, void *v)
 		seq_printf(m, "        Depth    Size   Location"
 			   "    (%d entries)\n"
 			   "        -----    ----   --------\n",
-			   max_stack_trace.nr_entries - 1);
+			   max_stack_trace.nr_entries);
 
 		if (!stack_tracer_enabled && !max_stack_size)
 			print_disabled(m);

From 8e436ca042d904533a1e14fdc85f0facdfca752f Mon Sep 17 00:00:00 2001
From: Umesh Tiwari <umesh.t@samsung.com>
Date: Mon, 22 Jun 2015 16:58:08 +0530
Subject: [PATCH 044/734] ftrace: add tracing_thresh to function profile

This patch extends tracing_thresh functionality to function profile tracer.
If tracing_thresh is set, print those entries only,
whose average is > tracing thresh.

Link: http://lkml.kernel.org/r/1434972488-8571-1-git-send-email-umesh.t@samsung.com

Signed-off-by: Umesh Tiwari <umesh.t@samsung.com>
[ Removed unnecessary 'moved' comment ]
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 02bece4a99ea36..f46dbb5cdf762b 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -613,13 +613,18 @@ static int function_stat_show(struct seq_file *m, void *v)
 		goto out;
 	}
 
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	avg = rec->time;
+	do_div(avg, rec->counter);
+	if (tracing_thresh && (avg < tracing_thresh))
+		goto out;
+#endif
+
 	kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
 	seq_printf(m, "  %-30.30s  %10lu", str, rec->counter);
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	seq_puts(m, "    ");
-	avg = rec->time;
-	do_div(avg, rec->counter);
 
 	/* Sample standard deviation (s^2) */
 	if (rec->counter <= 1)

From 82c355e81afbf16bc1ab379899a79eb66e2b7504 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Thu, 16 Jul 2015 21:58:52 -0400
Subject: [PATCH 045/734] ftrace: Fix function_graph duration spacing with
 7-digits

Jungseok Lee noticed the following:

Currently, row's width of 7-digit duration numbers not aligned with
other cases like the following example.

 3) $ 3999884 us |      }
 3)               |      finish_task_switch() {
 3)   0.365 us    |        _raw_spin_unlock_irq();
 3)   3.333 us    |      }
 3) $ 3999976 us |    }
 3) $ 3999979 us |  } /* schedule */

As adding a single white space in case of 7-digit numbers, the format
could be unified easily as follows.

 3) $ 2237472 us  |      }
 3)               |      finish_task_switch() {
 3)   0.364 us    |        _raw_spin_unlock_irq();
 3)   3.125 us    |      }
 3) $ 2237556 us  |    }
 3) $ 2237559 us  |  } /* schedule */

Instead of making a special case for 7-digit numbers, the logic
of the len and the space loop is slightly modified to make the
two cases have the same format.

Link: http://lkml.kernel.org/r/1436626300-1679-2-git-send-email-jungseoklee85@gmail.com

Reported-by: Jungseok Lee <jungseoklee85@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace_functions_graph.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 8968bf720c1259..ca98445782acaa 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -715,13 +715,13 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
 
 		snprintf(nsecs_str, slen, "%03lu", nsecs_rem);
 		trace_seq_printf(s, ".%s", nsecs_str);
-		len += strlen(nsecs_str);
+		len += strlen(nsecs_str) + 1;
 	}
 
 	trace_seq_puts(s, " us ");
 
 	/* Print remaining spaces to fit the row's width */
-	for (i = len; i < 7; i++)
+	for (i = len; i < 8; i++)
 		trace_seq_putc(s, ' ');
 }
 

From b838e1d96c613019095ba008afbee800977b0582 Mon Sep 17 00:00:00 2001
From: Jungseok Lee <jungseoklee85@gmail.com>
Date: Sat, 11 Jul 2015 14:51:40 +0000
Subject: [PATCH 046/734] tracing: Introduce two additional marks for delay

A fine granulity support for delay would be very useful when profiling
VM logics, such as page allocation including page reclaim and memory
compaction with function graph.

Thus, this patch adds two additional marks with two changes.

 - An equal sign in mark selection function is removed to align code
   behavior with comments and documentation.

 - The function graph example related to delay in ftrace.txt is updated
   to cover all supported marks.

Link: http://lkml.kernel.org/r/1436626300-1679-3-git-send-email-jungseoklee85@gmail.com

Cc: Byungchul Park <byungchul.park@lge.com>
Signed-off-by: Jungseok Lee <jungseoklee85@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 Documentation/trace/ftrace.txt | 51 +++++++++++++++++++++++++---------
 kernel/trace/trace_output.c    |  4 ++-
 2 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 7ddb1e319f84d1..072d3c4d575393 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -686,6 +686,8 @@ The above is mostly meaningful for kernel developers.
 	 The marks are determined by the difference between this
 	 current trace and the next trace.
 	  '$' - greater than 1 second
+	  '@' - greater than 100 milisecond
+	  '*' - greater than 10 milisecond
 	  '#' - greater than 1000 microsecond
 	  '!' - greater than 100 microsecond
 	  '+' - greater than 10 microsecond
@@ -1939,26 +1941,49 @@ want, depending on your needs.
 
   ie:
 
-  0)               |    up_write() {
-  0)   0.646 us    |      _spin_lock_irqsave();
-  0)   0.684 us    |      _spin_unlock_irqrestore();
-  0)   3.123 us    |    }
-  0)   0.548 us    |    fput();
-  0) + 58.628 us   |  }
+  3) # 1837.709 us |          } /* __switch_to */
+  3)               |          finish_task_switch() {
+  3)   0.313 us    |            _raw_spin_unlock_irq();
+  3)   3.177 us    |          }
+  3) # 1889.063 us |        } /* __schedule */
+  3) ! 140.417 us  |      } /* __schedule */
+  3) # 2034.948 us |    } /* schedule */
+  3) * 33998.59 us |  } /* schedule_preempt_disabled */
 
   [...]
 
-  0)               |      putname() {
-  0)               |        kmem_cache_free() {
-  0)   0.518 us    |          __phys_addr();
-  0)   1.757 us    |        }
-  0)   2.861 us    |      }
-  0) ! 115.305 us  |    }
-  0) ! 116.402 us  |  }
+  1)   0.260 us    |              msecs_to_jiffies();
+  1)   0.313 us    |              __rcu_read_unlock();
+  1) + 61.770 us   |            }
+  1) + 64.479 us   |          }
+  1)   0.313 us    |          rcu_bh_qs();
+  1)   0.313 us    |          __local_bh_enable();
+  1) ! 217.240 us  |        }
+  1)   0.365 us    |        idle_cpu();
+  1)               |        rcu_irq_exit() {
+  1)   0.417 us    |          rcu_eqs_enter_common.isra.47();
+  1)   3.125 us    |        }
+  1) ! 227.812 us  |      }
+  1) ! 457.395 us  |    }
+  1) @ 119760.2 us |  }
+
+  [...]
+
+  2)               |    handle_IPI() {
+  1)   6.979 us    |                  }
+  2)   0.417 us    |      scheduler_ipi();
+  1)   9.791 us    |                }
+  1) + 12.917 us   |              }
+  2)   3.490 us    |    }
+  1) + 15.729 us   |            }
+  1) + 18.542 us   |          }
+  2) $ 3594274 us  |  }
 
   + means that the function exceeded 10 usecs.
   ! means that the function exceeded 100 usecs.
   # means that the function exceeded 1000 usecs.
+  * means that the function exceeded 10 msecs.
+  @ means that the function exceeded 100 msecs.
   $ means that the function exceeded 1 sec.
 
 
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index dfab253727dc9e..8e481a84aeea79 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -496,6 +496,8 @@ static const struct trace_mark {
 	char			sym;
 } mark[] = {
 	MARK(1000000000ULL	, '$'), /* 1 sec */
+	MARK(100000000ULL	, '@'), /* 100 msec */
+	MARK(10000000ULL	, '*'), /* 10 msec */
 	MARK(1000000ULL		, '#'), /* 1000 usecs */
 	MARK(100000ULL		, '!'), /* 100 usecs */
 	MARK(10000ULL		, '+'), /* 10 usecs */
@@ -508,7 +510,7 @@ char trace_find_mark(unsigned long long d)
 	int size = ARRAY_SIZE(mark);
 
 	for (i = 0; i < size; i++) {
-		if (d >= mark[i].val)
+		if (d > mark[i].val)
 			break;
 	}
 

From c93bf928fea22c61f6b5c04786b325c9bfbc0462 Mon Sep 17 00:00:00 2001
From: Minfei Huang <mnfhuang@gmail.com>
Date: Sun, 12 Jul 2015 17:52:24 +0800
Subject: [PATCH 047/734] ftrace: Format MCOUNT_ADDR address as type unsigned
 long

Always we use type unsigned long to format the ip address, since the
value of ip address is never the negative.

This patch uses type unsigned long, instead of long, to format the ip
address. The code is more clearly to be viewed by using type unsigned
long, although it is correct by using either unsigned long or long.

Link: http://lkml.kernel.org/r/1436694744-16747-1-git-send-email-mhuang@redhat.com

Cc: Minfei Huang <mnfhuang@gmail.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Minfei Huang <mnfhuang@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/metag/include/asm/ftrace.h      | 2 +-
 arch/microblaze/include/asm/ftrace.h | 2 +-
 arch/powerpc/include/asm/ftrace.h    | 2 +-
 arch/sh/include/asm/ftrace.h         | 2 +-
 arch/sparc/include/asm/ftrace.h      | 2 +-
 arch/x86/include/asm/ftrace.h        | 4 ++--
 6 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/metag/include/asm/ftrace.h b/arch/metag/include/asm/ftrace.h
index 2901f0f7d944ec..a2269d60a945bc 100644
--- a/arch/metag/include/asm/ftrace.h
+++ b/arch/metag/include/asm/ftrace.h
@@ -6,7 +6,7 @@
 
 #ifndef __ASSEMBLY__
 extern void mcount_wrapper(void);
-#define MCOUNT_ADDR		((long)(mcount_wrapper))
+#define MCOUNT_ADDR		((unsigned long)(mcount_wrapper))
 
 static inline unsigned long ftrace_call_adjust(unsigned long addr)
 {
diff --git a/arch/microblaze/include/asm/ftrace.h b/arch/microblaze/include/asm/ftrace.h
index fd2fa2eca62f18..da0144f40d99d5 100644
--- a/arch/microblaze/include/asm/ftrace.h
+++ b/arch/microblaze/include/asm/ftrace.h
@@ -3,7 +3,7 @@
 
 #ifdef CONFIG_FUNCTION_TRACER
 
-#define MCOUNT_ADDR		((long)(_mcount))
+#define MCOUNT_ADDR		((unsigned long)(_mcount))
 #define MCOUNT_INSN_SIZE	8 /* sizeof mcount call */
 
 #ifndef __ASSEMBLY__
diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index e3661872fbea5b..ef89b146557310 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -2,7 +2,7 @@
 #define _ASM_POWERPC_FTRACE
 
 #ifdef CONFIG_FUNCTION_TRACER
-#define MCOUNT_ADDR		((long)(_mcount))
+#define MCOUNT_ADDR		((unsigned long)(_mcount))
 #define MCOUNT_INSN_SIZE	4 /* sizeof mcount call */
 
 #ifdef __ASSEMBLY__
diff --git a/arch/sh/include/asm/ftrace.h b/arch/sh/include/asm/ftrace.h
index e79fb6ebaa4237..1f157b86eaa7a1 100644
--- a/arch/sh/include/asm/ftrace.h
+++ b/arch/sh/include/asm/ftrace.h
@@ -9,7 +9,7 @@
 #ifndef __ASSEMBLY__
 extern void mcount(void);
 
-#define MCOUNT_ADDR		((long)(mcount))
+#define MCOUNT_ADDR		((unsigned long)(mcount))
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 #define CALL_ADDR		((long)(ftrace_call))
diff --git a/arch/sparc/include/asm/ftrace.h b/arch/sparc/include/asm/ftrace.h
index 9ec94ad116fbdd..3192a8e42fd62c 100644
--- a/arch/sparc/include/asm/ftrace.h
+++ b/arch/sparc/include/asm/ftrace.h
@@ -2,7 +2,7 @@
 #define _ASM_SPARC64_FTRACE
 
 #ifdef CONFIG_MCOUNT
-#define MCOUNT_ADDR		((long)(_mcount))
+#define MCOUNT_ADDR		((unsigned long)(_mcount))
 #define MCOUNT_INSN_SIZE	4 /* sizeof mcount call */
 
 #ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index f45acad3c4b678..24938852db3013 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -3,9 +3,9 @@
 
 #ifdef CONFIG_FUNCTION_TRACER
 #ifdef CC_USING_FENTRY
-# define MCOUNT_ADDR		((long)(__fentry__))
+# define MCOUNT_ADDR		((unsigned long)(__fentry__))
 #else
-# define MCOUNT_ADDR		((long)(mcount))
+# define MCOUNT_ADDR		((unsigned long)(mcount))
 #endif
 #define MCOUNT_INSN_SIZE	5 /* sizeof mcount call */
 

From 3bf2789cad9e6573dc19a6c3d123c2c049f2d90f Mon Sep 17 00:00:00 2001
From: Vivek Trivedi <t.vivek@samsung.com>
Date: Mon, 22 Jun 2015 15:36:06 +0530
Subject: [PATCH 048/734] smack: allow mount opts setting over filesystems with
 binary mount data

Add support for setting smack mount labels(using smackfsdef, smackfsroot,
smackfshat, smackfsfloor, smackfstransmute) for filesystems with binary
mount data like NFS.

To achieve this, implement sb_parse_opts_str and sb_set_mnt_opts security
operations in smack LSM similar to SELinux.

Signed-off-by: Vivek Trivedi <t.vivek@samsung.com>
Signed-off-by: Amit Sahrawat <a.sahrawat@samsung.com>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
---
 security/smack/smack.h     |  18 +++
 security/smack/smack_lsm.c | 241 +++++++++++++++++++++++++++++++------
 2 files changed, 219 insertions(+), 40 deletions(-)

diff --git a/security/smack/smack.h b/security/smack/smack.h
index 244e035e5a99f3..69ab9eb7d6d927 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -143,6 +143,24 @@ struct smack_onlycap {
 	struct smack_known	*smk_label;
 };
 
+/* Super block security struct flags for mount options */
+#define FSDEFAULT_MNT	0x01
+#define FSFLOOR_MNT	0x02
+#define FSHAT_MNT	0x04
+#define FSROOT_MNT	0x08
+#define FSTRANS_MNT	0x10
+
+#define NUM_SMK_MNT_OPTS	5
+
+enum {
+	Opt_error = -1,
+	Opt_fsdefault = 1,
+	Opt_fsfloor = 2,
+	Opt_fshat = 3,
+	Opt_fsroot = 4,
+	Opt_fstransmute = 5,
+};
+
 /*
  * Mount options
  */
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index a143328f75ebb0..d962f887d3f445 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -41,6 +41,7 @@
 #include <linux/msg.h>
 #include <linux/shm.h>
 #include <linux/binfmts.h>
+#include <linux/parser.h>
 #include "smack.h"
 
 #define TRANS_TRUE	"TRUE"
@@ -64,6 +65,15 @@ static char *smk_bu_mess[] = {
 	"Unconfined Object",	/* SMACK_UNCONFINED_OBJECT */
 };
 
+static const match_table_t tokens = {
+	{Opt_fsdefault, SMK_FSDEFAULT "%s"},
+	{Opt_fsfloor, SMK_FSFLOOR "%s"},
+	{Opt_fshat, SMK_FSHAT "%s"},
+	{Opt_fsroot, SMK_FSROOT "%s"},
+	{Opt_fstransmute, SMK_FSTRANS "%s"},
+	{Opt_error, NULL},
+};
+
 static void smk_bu_mode(int mode, char *s)
 {
 	int i = 0;
@@ -577,76 +587,193 @@ static int smack_sb_copy_data(char *orig, char *smackopts)
 }
 
 /**
- * smack_sb_kern_mount - Smack specific mount processing
+ * smack_parse_opts_str - parse Smack specific mount options
+ * @options: mount options string
+ * @opts: where to store converted mount opts
+ *
+ * Returns 0 on success or -ENOMEM on error.
+ *
+ * converts Smack specific mount options to generic security option format
+ */
+static int smack_parse_opts_str(char *options,
+		struct security_mnt_opts *opts)
+{
+	char *p;
+	char *fsdefault = NULL, *fsfloor = NULL;
+	char *fshat = NULL, *fsroot = NULL, *fstransmute = NULL;
+	int rc = -ENOMEM, num_mnt_opts = 0;
+
+	opts->num_mnt_opts = 0;
+
+	if (!options)
+		return 0;
+
+	while ((p = strsep(&options, ",")) != NULL) {
+		int token;
+		substring_t args[MAX_OPT_ARGS];
+
+		if (!*p)
+			continue;
+
+		token = match_token(p, tokens, args);
+
+		switch (token) {
+		case Opt_fsdefault:
+			if (fsdefault)
+				goto out_opt_err;
+			fsdefault = match_strdup(&args[0]);
+			if (!fsdefault)
+				goto out_err;
+			break;
+		case Opt_fsfloor:
+			if (fsfloor)
+				goto out_opt_err;
+			fsfloor = match_strdup(&args[0]);
+			if (!fsfloor)
+				goto out_err;
+			break;
+		case Opt_fshat:
+			if (fshat)
+				goto out_opt_err;
+			fshat = match_strdup(&args[0]);
+			if (!fshat)
+				goto out_err;
+			break;
+		case Opt_fsroot:
+			if (fsroot)
+				goto out_opt_err;
+			fsroot = match_strdup(&args[0]);
+			if (!fsroot)
+				goto out_err;
+			break;
+		case Opt_fstransmute:
+			if (fstransmute)
+				goto out_opt_err;
+			fstransmute = match_strdup(&args[0]);
+			if (!fstransmute)
+				goto out_err;
+			break;
+		default:
+			rc = -EINVAL;
+			pr_warn("Smack:  unknown mount option\n");
+			goto out_err;
+		}
+	}
+
+	opts->mnt_opts = kcalloc(NUM_SMK_MNT_OPTS, sizeof(char *), GFP_ATOMIC);
+	if (!opts->mnt_opts)
+		goto out_err;
+
+	opts->mnt_opts_flags = kcalloc(NUM_SMK_MNT_OPTS, sizeof(int),
+			GFP_ATOMIC);
+	if (!opts->mnt_opts_flags) {
+		kfree(opts->mnt_opts);
+		goto out_err;
+	}
+
+	if (fsdefault) {
+		opts->mnt_opts[num_mnt_opts] = fsdefault;
+		opts->mnt_opts_flags[num_mnt_opts++] = FSDEFAULT_MNT;
+	}
+	if (fsfloor) {
+		opts->mnt_opts[num_mnt_opts] = fsfloor;
+		opts->mnt_opts_flags[num_mnt_opts++] = FSFLOOR_MNT;
+	}
+	if (fshat) {
+		opts->mnt_opts[num_mnt_opts] = fshat;
+		opts->mnt_opts_flags[num_mnt_opts++] = FSHAT_MNT;
+	}
+	if (fsroot) {
+		opts->mnt_opts[num_mnt_opts] = fsroot;
+		opts->mnt_opts_flags[num_mnt_opts++] = FSROOT_MNT;
+	}
+	if (fstransmute) {
+		opts->mnt_opts[num_mnt_opts] = fstransmute;
+		opts->mnt_opts_flags[num_mnt_opts++] = FSTRANS_MNT;
+	}
+
+	opts->num_mnt_opts = num_mnt_opts;
+	return 0;
+
+out_opt_err:
+	rc = -EINVAL;
+	pr_warn("Smack: duplicate mount options\n");
+
+out_err:
+	kfree(fsdefault);
+	kfree(fsfloor);
+	kfree(fshat);
+	kfree(fsroot);
+	kfree(fstransmute);
+	return rc;
+}
+
+/**
+ * smack_set_mnt_opts - set Smack specific mount options
  * @sb: the file system superblock
- * @flags: the mount flags
- * @data: the smack mount options
+ * @opts: Smack mount options
+ * @kern_flags: mount option from kernel space or user space
+ * @set_kern_flags: where to store converted mount opts
  *
  * Returns 0 on success, an error code on failure
+ *
+ * Allow filesystems with binary mount data to explicitly set Smack mount
+ * labels.
  */
-static int smack_sb_kern_mount(struct super_block *sb, int flags, void *data)
+static int smack_set_mnt_opts(struct super_block *sb,
+		struct security_mnt_opts *opts,
+		unsigned long kern_flags,
+		unsigned long *set_kern_flags)
 {
 	struct dentry *root = sb->s_root;
 	struct inode *inode = d_backing_inode(root);
 	struct superblock_smack *sp = sb->s_security;
 	struct inode_smack *isp;
 	struct smack_known *skp;
-	char *op;
-	char *commap;
+	int i;
+	int num_opts = opts->num_mnt_opts;
 	int transmute = 0;
-	int specified = 0;
 
 	if (sp->smk_initialized)
 		return 0;
 
 	sp->smk_initialized = 1;
 
-	for (op = data; op != NULL; op = commap) {
-		commap = strchr(op, ',');
-		if (commap != NULL)
-			*commap++ = '\0';
-
-		if (strncmp(op, SMK_FSHAT, strlen(SMK_FSHAT)) == 0) {
-			op += strlen(SMK_FSHAT);
-			skp = smk_import_entry(op, 0);
+	for (i = 0; i < num_opts; i++) {
+		switch (opts->mnt_opts_flags[i]) {
+		case FSDEFAULT_MNT:
+			skp = smk_import_entry(opts->mnt_opts[i], 0);
 			if (IS_ERR(skp))
 				return PTR_ERR(skp);
-			sp->smk_hat = skp;
-			specified = 1;
-
-		} else if (strncmp(op, SMK_FSFLOOR, strlen(SMK_FSFLOOR)) == 0) {
-			op += strlen(SMK_FSFLOOR);
-			skp = smk_import_entry(op, 0);
+			sp->smk_default = skp;
+			break;
+		case FSFLOOR_MNT:
+			skp = smk_import_entry(opts->mnt_opts[i], 0);
 			if (IS_ERR(skp))
 				return PTR_ERR(skp);
 			sp->smk_floor = skp;
-			specified = 1;
-
-		} else if (strncmp(op, SMK_FSDEFAULT,
-				   strlen(SMK_FSDEFAULT)) == 0) {
-			op += strlen(SMK_FSDEFAULT);
-			skp = smk_import_entry(op, 0);
+			break;
+		case FSHAT_MNT:
+			skp = smk_import_entry(opts->mnt_opts[i], 0);
 			if (IS_ERR(skp))
 				return PTR_ERR(skp);
-			sp->smk_default = skp;
-			specified = 1;
-
-		} else if (strncmp(op, SMK_FSROOT, strlen(SMK_FSROOT)) == 0) {
-			op += strlen(SMK_FSROOT);
-			skp = smk_import_entry(op, 0);
+			sp->smk_hat = skp;
+			break;
+		case FSROOT_MNT:
+			skp = smk_import_entry(opts->mnt_opts[i], 0);
 			if (IS_ERR(skp))
 				return PTR_ERR(skp);
 			sp->smk_root = skp;
-			specified = 1;
-
-		} else if (strncmp(op, SMK_FSTRANS, strlen(SMK_FSTRANS)) == 0) {
-			op += strlen(SMK_FSTRANS);
-			skp = smk_import_entry(op, 0);
+			break;
+		case FSTRANS_MNT:
+			skp = smk_import_entry(opts->mnt_opts[i], 0);
 			if (IS_ERR(skp))
 				return PTR_ERR(skp);
 			sp->smk_root = skp;
 			transmute = 1;
-			specified = 1;
+			break;
+		default:
+			break;
 		}
 	}
 
@@ -654,7 +781,7 @@ static int smack_sb_kern_mount(struct super_block *sb, int flags, void *data)
 		/*
 		 * Unprivileged mounts don't get to specify Smack values.
 		 */
-		if (specified)
+		if (num_opts)
 			return -EPERM;
 		/*
 		 * Unprivileged mounts get root and default from the caller.
@@ -663,6 +790,7 @@ static int smack_sb_kern_mount(struct super_block *sb, int flags, void *data)
 		sp->smk_root = skp;
 		sp->smk_default = skp;
 	}
+
 	/*
 	 * Initialize the root inode.
 	 */
@@ -681,6 +809,37 @@ static int smack_sb_kern_mount(struct super_block *sb, int flags, void *data)
 	return 0;
 }
 
+/**
+ * smack_sb_kern_mount - Smack specific mount processing
+ * @sb: the file system superblock
+ * @flags: the mount flags
+ * @data: the smack mount options
+ *
+ * Returns 0 on success, an error code on failure
+ */
+static int smack_sb_kern_mount(struct super_block *sb, int flags, void *data)
+{
+	int rc = 0;
+	char *options = data;
+	struct security_mnt_opts opts;
+
+	security_init_mnt_opts(&opts);
+
+	if (!options)
+		goto out;
+
+	rc = smack_parse_opts_str(options, &opts);
+	if (rc)
+		goto out_err;
+
+out:
+	rc = smack_set_mnt_opts(sb, &opts, 0, NULL);
+
+out_err:
+	security_free_mnt_opts(&opts);
+	return rc;
+}
+
 /**
  * smack_sb_statfs - Smack check on statfs
  * @dentry: identifies the file system in question
@@ -4264,6 +4423,8 @@ struct security_hook_list smack_hooks[] = {
 	LSM_HOOK_INIT(sb_copy_data, smack_sb_copy_data),
 	LSM_HOOK_INIT(sb_kern_mount, smack_sb_kern_mount),
 	LSM_HOOK_INIT(sb_statfs, smack_sb_statfs),
+	LSM_HOOK_INIT(sb_set_mnt_opts, smack_set_mnt_opts),
+	LSM_HOOK_INIT(sb_parse_opts_str, smack_parse_opts_str),
 
 	LSM_HOOK_INIT(bprm_set_creds, smack_bprm_set_creds),
 	LSM_HOOK_INIT(bprm_committing_creds, smack_bprm_committing_creds),

From ca70d27e445fe721587598030b97357b35f61913 Mon Sep 17 00:00:00 2001
From: kbuild test robot <fengguang.wu@intel.com>
Date: Wed, 24 Jun 2015 07:41:07 +0800
Subject: [PATCH 049/734] sysfs: fix simple_return.cocci warnings

security/smack/smackfs.c:2251:1-4: WARNING: end returns can be
simpified and declaration on line 2250 can be dropped

 Simplify a trivial if-return sequence.  Possibly combine with a
 preceding function call.

Generated by: scripts/coccinelle/misc/simple_return.cocci

Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
---
 security/smack/smackfs.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index 2716d02119f3e8..81a2888a990863 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -2320,11 +2320,7 @@ static const struct file_operations smk_revoke_subj_ops = {
  */
 static int smk_init_sysfs(void)
 {
-	int err;
-	err = sysfs_create_mount_point(fs_kobj, "smackfs");
-	if (err)
-		return err;
-	return 0;
+	return sysfs_create_mount_point(fs_kobj, "smackfs");
 }
 
 /**

From 7deef550f3a7d44c1d52a6d54f824e7e180c08ae Mon Sep 17 00:00:00 2001
From: Azael Avalos <coproscefalo@gmail.com>
Date: Wed, 22 Jul 2015 18:09:10 -0600
Subject: [PATCH 050/734] toshiba_acpi: Adapt /proc/acpi/toshiba/keys to
 TOS1900 devices

Since the introduction of TOS1900 devices support to the driver, the
"keys" entry under the proc directory was broken, given that it only
handled TOS620X devices accordingly.

This patch adapts the code to show the hotkey values of TOS1900
devices too, and in case some programs are still using that interface,
hotkeys reporting should now work on these devices.

Signed-off-by: Azael Avalos <coproscefalo@gmail.com>
Signed-off-by: Darren Hart <dvhart@linux.intel.com>
---
 drivers/platform/x86/toshiba_acpi.c | 56 +++++++++++------------------
 1 file changed, 20 insertions(+), 36 deletions(-)

diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index 3ad7b1fa24ce54..c3a0c4d0c1dc4e 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -1499,32 +1499,10 @@ static const struct file_operations fan_proc_fops = {
 static int keys_proc_show(struct seq_file *m, void *v)
 {
 	struct toshiba_acpi_dev *dev = m->private;
-	u32 hci_result;
-	u32 value;
-
-	if (!dev->key_event_valid && dev->system_event_supported) {
-		hci_result = hci_read(dev, HCI_SYSTEM_EVENT, &value);
-		if (hci_result == TOS_SUCCESS) {
-			dev->key_event_valid = 1;
-			dev->last_key_event = value;
-		} else if (hci_result == TOS_FIFO_EMPTY) {
-			/* Better luck next time */
-		} else if (hci_result == TOS_NOT_SUPPORTED) {
-			/*
-			 * This is a workaround for an unresolved issue on
-			 * some machines where system events sporadically
-			 * become disabled.
-			 */
-			hci_result = hci_write(dev, HCI_SYSTEM_EVENT, 1);
-			pr_notice("Re-enabled hotkeys\n");
-		} else {
-			pr_err("Error reading hotkey status\n");
-			return -EIO;
-		}
-	}
 
 	seq_printf(m, "hotkey_ready:            %d\n", dev->key_event_valid);
 	seq_printf(m, "hotkey:                  0x%04x\n", dev->last_key_event);
+
 	return 0;
 }
 
@@ -2361,22 +2339,28 @@ static void toshiba_acpi_report_hotkey(struct toshiba_acpi_dev *dev,
 
 static void toshiba_acpi_process_hotkeys(struct toshiba_acpi_dev *dev)
 {
-	u32 hci_result, value;
-	int retries = 3;
-	int scancode;
-
 	if (dev->info_supported) {
-		scancode = toshiba_acpi_query_hotkey(dev);
-		if (scancode < 0)
+		int scancode = toshiba_acpi_query_hotkey(dev);
+
+		if (scancode < 0) {
 			pr_err("Failed to query hotkey event\n");
-		else if (scancode != 0)
+		} else if (scancode != 0) {
 			toshiba_acpi_report_hotkey(dev, scancode);
+			dev->key_event_valid = 1;
+			dev->last_key_event = scancode;
+		}
 	} else if (dev->system_event_supported) {
+		u32 result;
+		u32 value;
+		int retries = 3;
+
 		do {
-			hci_result = hci_read(dev, HCI_SYSTEM_EVENT, &value);
-			switch (hci_result) {
+			result = hci_read(dev, HCI_SYSTEM_EVENT, &value);
+			switch (result) {
 			case TOS_SUCCESS:
 				toshiba_acpi_report_hotkey(dev, (int)value);
+				dev->key_event_valid = 1;
+				dev->last_key_event = value;
 				break;
 			case TOS_NOT_SUPPORTED:
 				/*
@@ -2384,15 +2368,15 @@ static void toshiba_acpi_process_hotkeys(struct toshiba_acpi_dev *dev)
 				 * issue on some machines where system events
 				 * sporadically become disabled.
 				 */
-				hci_result =
-					hci_write(dev, HCI_SYSTEM_EVENT, 1);
-				pr_notice("Re-enabled hotkeys\n");
+				result = hci_write(dev, HCI_SYSTEM_EVENT, 1);
+				if (result == TOS_SUCCESS)
+					pr_notice("Re-enabled hotkeys\n");
 				/* Fall through */
 			default:
 				retries--;
 				break;
 			}
-		} while (retries && hci_result != TOS_FIFO_EMPTY);
+		} while (retries && result != TOS_FIFO_EMPTY);
 	}
 }
 

From fc5462f8525b47fa219452289ecb22c921c16823 Mon Sep 17 00:00:00 2001
From: Azael Avalos <coproscefalo@gmail.com>
Date: Wed, 22 Jul 2015 18:09:11 -0600
Subject: [PATCH 051/734] toshiba_acpi: Add /dev/toshiba_acpi device

There were previous attempts to "merge" the toshiba SMM module to the
toshiba_acpi one, they were trying to imitate what the old toshiba
module does, however, some models (TOS1900 devices) come with a
"crippled" implementation and do not provide all the "features" a
"genuine" Toshiba BIOS does.

This patch adds a new device called toshiba_acpi, which aim is to
enable userspace to access the SMM on Toshiba laptops via ACPI calls.

Creating a new convenience _IOWR command to access the SCI functions
by opening/closing the SCI internally to avoid buggy BIOS, while at
the same time providing backwards compatibility.

Older programs (and new) who wish to access the SMM on newer models
can do it by pointing their path to /dev/toshiba_acpi (instead of
/dev/toshiba) as the toshiba.h header was modified to reflect these
changes as well as adds all the toshiba_acpi paths and command,
however, it is strongly recommended to use the new IOCTL for any
SCI command to avoid any buggy BIOS.

Signed-off-by: Azael Avalos <coproscefalo@gmail.com>
Signed-off-by: Darren Hart <dvhart@linux.intel.com>
---
 Documentation/ioctl/ioctl-number.txt |  2 +-
 drivers/platform/x86/toshiba_acpi.c  | 91 ++++++++++++++++++++++++++++
 include/uapi/linux/toshiba.h         | 32 +++++++++-
 3 files changed, 121 insertions(+), 4 deletions(-)

diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 611c52267d2481..21d2f27c886b4d 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -263,7 +263,7 @@ Code  Seq#(hex)	Include File		Comments
 's'	all	linux/cdk.h
 't'	00-7F	linux/ppp-ioctl.h
 't'	80-8F	linux/isdn_ppp.h
-'t'	90	linux/toshiba.h
+'t'	90-91	linux/toshiba.h		toshiba and toshiba_acpi SMM
 'u'	00-1F	linux/smb_fs.h		gone
 'u'	20-3F	linux/uvcvideo.h	USB video class host driver
 'v'	00-1F	linux/ext2_fs.h		conflict!
diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index c3a0c4d0c1dc4e..802577f43a2337 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -50,6 +50,8 @@
 #include <linux/acpi.h>
 #include <linux/dmi.h>
 #include <linux/uaccess.h>
+#include <linux/miscdevice.h>
+#include <linux/toshiba.h>
 #include <acpi/video.h>
 
 MODULE_AUTHOR("John Belmonte");
@@ -170,6 +172,7 @@ struct toshiba_acpi_dev {
 	struct led_classdev led_dev;
 	struct led_classdev kbd_led;
 	struct led_classdev eco_led;
+	struct miscdevice miscdev;
 
 	int force_fan;
 	int last_key_event;
@@ -2239,6 +2242,81 @@ static struct attribute_group toshiba_attr_group = {
 	.attrs = toshiba_attributes,
 };
 
+/*
+ * Misc device
+ */
+static int toshiba_acpi_smm_bridge(SMMRegisters *regs)
+{
+	u32 in[TCI_WORDS] = { regs->eax, regs->ebx, regs->ecx,
+			      regs->edx, regs->esi, regs->edi };
+	u32 out[TCI_WORDS];
+	acpi_status status;
+
+	status = tci_raw(toshiba_acpi, in, out);
+	if (ACPI_FAILURE(status)) {
+		pr_err("ACPI call to query SMM registers failed\n");
+		return -EIO;
+	}
+
+	/* Fillout the SMM struct with the TCI call results */
+	regs->eax = out[0];
+	regs->ebx = out[1];
+	regs->ecx = out[2];
+	regs->edx = out[3];
+	regs->esi = out[4];
+	regs->edi = out[5];
+
+	return 0;
+}
+
+static long toshiba_acpi_ioctl(struct file *fp, unsigned int cmd,
+			       unsigned long arg)
+{
+	SMMRegisters __user *argp = (SMMRegisters __user *)arg;
+	SMMRegisters regs;
+	int ret;
+
+	if (!argp)
+		return -EINVAL;
+
+	switch (cmd) {
+	case TOSH_SMM:
+		if (copy_from_user(&regs, argp, sizeof(SMMRegisters)))
+			return -EFAULT;
+		ret = toshiba_acpi_smm_bridge(&regs);
+		if (ret)
+			return ret;
+		if (copy_to_user(argp, &regs, sizeof(SMMRegisters)))
+			return -EFAULT;
+		break;
+	case TOSHIBA_ACPI_SCI:
+		if (copy_from_user(&regs, argp, sizeof(SMMRegisters)))
+			return -EFAULT;
+		/* Ensure we are being called with a SCI_{GET, SET} register */
+		if (regs.eax != SCI_GET && regs.eax != SCI_SET)
+			return -EINVAL;
+		if (!sci_open(toshiba_acpi))
+			return -EIO;
+		ret = toshiba_acpi_smm_bridge(&regs);
+		sci_close(toshiba_acpi);
+		if (ret)
+			return ret;
+		if (copy_to_user(argp, &regs, sizeof(SMMRegisters)))
+			return -EFAULT;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct file_operations toshiba_acpi_fops = {
+	.owner		= THIS_MODULE,
+	.unlocked_ioctl = toshiba_acpi_ioctl,
+	.llseek		= noop_llseek,
+};
+
 /*
  * Hotkeys
  */
@@ -2540,6 +2618,8 @@ static int toshiba_acpi_remove(struct acpi_device *acpi_dev)
 {
 	struct toshiba_acpi_dev *dev = acpi_driver_data(acpi_dev);
 
+	misc_deregister(&dev->miscdev);
+
 	remove_toshiba_proc_entries(dev);
 
 	if (dev->sysfs_created)
@@ -2611,6 +2691,17 @@ static int toshiba_acpi_add(struct acpi_device *acpi_dev)
 		return -ENOMEM;
 	dev->acpi_dev = acpi_dev;
 	dev->method_hci = hci_method;
+	dev->miscdev.minor = MISC_DYNAMIC_MINOR;
+	dev->miscdev.name = "toshiba_acpi";
+	dev->miscdev.fops = &toshiba_acpi_fops;
+
+	ret = misc_register(&dev->miscdev);
+	if (ret) {
+		pr_err("Failed to register miscdevice\n");
+		kfree(dev);
+		return ret;
+	}
+
 	acpi_dev->driver_data = dev;
 	dev_set_drvdata(&acpi_dev->dev, dev);
 
diff --git a/include/uapi/linux/toshiba.h b/include/uapi/linux/toshiba.h
index e9bef5b2f91ebf..c58bf4b5bb2664 100644
--- a/include/uapi/linux/toshiba.h
+++ b/include/uapi/linux/toshiba.h
@@ -1,6 +1,7 @@
 /* toshiba.h -- Linux driver for accessing the SMM on Toshiba laptops 
  *
  * Copyright (c) 1996-2000  Jonathan A. Buzzard (jonathan@buzzard.org.uk)
+ * Copyright (c) 2015  Azael Avalos <coproscefalo@gmail.com>
  *
  * Thanks to Juergen Heinzl <juergen@monocerus.demon.co.uk> for the pointers
  * on making sure the structure is aligned and packed.
@@ -20,9 +21,18 @@
 #ifndef _UAPI_LINUX_TOSHIBA_H
 #define _UAPI_LINUX_TOSHIBA_H
 
-#define TOSH_PROC "/proc/toshiba"
-#define TOSH_DEVICE "/dev/toshiba"
-#define TOSH_SMM _IOWR('t', 0x90, int)	/* broken: meant 24 bytes */
+/*
+ * Toshiba modules paths
+ */
+
+#define TOSH_PROC		"/proc/toshiba"
+#define TOSH_DEVICE		"/dev/toshiba"
+#define TOSHIBA_ACPI_PROC	"/proc/acpi/toshiba"
+#define TOSHIBA_ACPI_DEVICE	"/dev/toshiba_acpi"
+
+/*
+ * Toshiba SMM structure
+ */
 
 typedef struct {
 	unsigned int eax;
@@ -33,5 +43,21 @@ typedef struct {
 	unsigned int edi __attribute__ ((packed));
 } SMMRegisters;
 
+/*
+ * IOCTLs (0x90 - 0x91)
+ */
+
+#define TOSH_SMM		_IOWR('t', 0x90, SMMRegisters)
+/*
+ * Convenience toshiba_acpi command.
+ *
+ * The System Configuration Interface (SCI) is opened/closed internally
+ * to avoid userspace of buggy BIOSes.
+ *
+ * The toshiba_acpi module checks whether the eax register is set with
+ * SCI_GET (0xf300) or SCI_SET (0xf400), returning -EINVAL if not.
+ */
+#define TOSHIBA_ACPI_SCI	_IOWR('t', 0x91, SMMRegisters)
+
 
 #endif /* _UAPI_LINUX_TOSHIBA_H */

From a88bc06e5aec4984f5bf01c6d410a0939134f737 Mon Sep 17 00:00:00 2001
From: Azael Avalos <coproscefalo@gmail.com>
Date: Wed, 22 Jul 2015 18:09:12 -0600
Subject: [PATCH 052/734] toshiba_acpi: Avoid registering input device on WMI
 event laptops

Commit f11f999e9890 ("toshiba_acpi: Refuse to load on machines with
buggy INFO implementations") denied loading on laptops with a WMI Event
GUID given that such laptops manage the hotkeys via that interface,
however, such laptops have a working Toshiba Configuration Interface
(TCI), and thus, such commit denied several supported features.

This patch avoids registering the input device and ignores all hotkey
events on laptops with such WMI Event GUID, making the supported
features found in those laptops to work.

Signed-off-by: Azael Avalos <coproscefalo@gmail.com>
Signed-off-by: Darren Hart <dvhart@linux.intel.com>
---
 drivers/platform/x86/toshiba_acpi.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index 802577f43a2337..48b16b323c8904 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -2466,6 +2466,11 @@ static int toshiba_acpi_setup_keyboard(struct toshiba_acpi_dev *dev)
 	u32 hci_result;
 	int error;
 
+	if (wmi_has_guid(TOSHIBA_WMI_EVENT_GUID)) {
+		pr_info("WMI event detected, hotkeys will not be monitored\n");
+		return 0;
+	}
+
 	error = toshiba_acpi_enable_hotkeys(dev);
 	if (error)
 		return error;
@@ -2813,6 +2818,14 @@ static void toshiba_acpi_notify(struct acpi_device *acpi_dev, u32 event)
 
 	switch (event) {
 	case 0x80: /* Hotkeys and some system events */
+		/*
+		 * Machines with this WMI GUID aren't supported due to bugs in
+		 * their AML.
+		 *
+		 * Return silently to avoid triggering a netlink event.
+		 */
+		if (wmi_has_guid(TOSHIBA_WMI_EVENT_GUID))
+			return;
 		toshiba_acpi_process_hotkeys(dev);
 		break;
 	case 0x81: /* Dock events */
@@ -2899,14 +2912,6 @@ static int __init toshiba_acpi_init(void)
 {
 	int ret;
 
-	/*
-	 * Machines with this WMI guid aren't supported due to bugs in
-	 * their AML. This check relies on wmi initializing before
-	 * toshiba_acpi to guarantee guids have been identified.
-	 */
-	if (wmi_has_guid(TOSHIBA_WMI_EVENT_GUID))
-		return -ENODEV;
-
 	toshiba_proc_dir = proc_mkdir(PROC_TOSHIBA, acpi_root_dir);
 	if (!toshiba_proc_dir) {
 		pr_err("Unable to create proc dir " PROC_TOSHIBA "\n");

From 695f6060903cefa08ffb78433136f51ac0f94488 Mon Sep 17 00:00:00 2001
From: Azael Avalos <coproscefalo@gmail.com>
Date: Wed, 22 Jul 2015 18:09:13 -0600
Subject: [PATCH 053/734] toshiba_acpi: Transflective backlight updates

This patch changes the tr function second parameter from bool to u32,
to be on par with the rest of the TCI functions of the driver, and the
code was updated accordingly.

Also, the check for translective support was moved to the *add
function, as the {__get, set}_lcd_brightness functions make use of it.

Signed-off-by: Azael Avalos <coproscefalo@gmail.com>
Signed-off-by: Darren Hart <dvhart@linux.intel.com>
---
 drivers/platform/x86/toshiba_acpi.c | 30 +++++++++++------------------
 1 file changed, 11 insertions(+), 19 deletions(-)

diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index 48b16b323c8904..649786de4a79d7 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -1187,22 +1187,17 @@ static int toshiba_hotkey_event_type_get(struct toshiba_acpi_dev *dev,
 }
 
 /* Transflective Backlight */
-static int get_tr_backlight_status(struct toshiba_acpi_dev *dev, bool *enabled)
+static int get_tr_backlight_status(struct toshiba_acpi_dev *dev, u32 *status)
 {
-	u32 hci_result;
-	u32 status;
+	u32 hci_result = hci_read(dev, HCI_TR_BACKLIGHT, status);
 
-	hci_result = hci_read(dev, HCI_TR_BACKLIGHT, &status);
-	*enabled = !status;
 	return hci_result == TOS_SUCCESS ? 0 : -EIO;
 }
 
-static int set_tr_backlight_status(struct toshiba_acpi_dev *dev, bool enable)
+static int set_tr_backlight_status(struct toshiba_acpi_dev *dev, u32 status)
 {
-	u32 hci_result;
-	u32 value = !enable;
+	u32 hci_result = hci_write(dev, HCI_TR_BACKLIGHT, !status);
 
-	hci_result = hci_write(dev, HCI_TR_BACKLIGHT, value);
 	return hci_result == TOS_SUCCESS ? 0 : -EIO;
 }
 
@@ -1216,12 +1211,11 @@ static int __get_lcd_brightness(struct toshiba_acpi_dev *dev)
 	int brightness = 0;
 
 	if (dev->tr_backlight_supported) {
-		bool enabled;
-		int ret = get_tr_backlight_status(dev, &enabled);
+		int ret = get_tr_backlight_status(dev, &value);
 
 		if (ret)
 			return ret;
-		if (enabled)
+		if (value)
 			return 0;
 		brightness++;
 	}
@@ -1271,8 +1265,7 @@ static int set_lcd_brightness(struct toshiba_acpi_dev *dev, int value)
 	u32 hci_result;
 
 	if (dev->tr_backlight_supported) {
-		bool enable = !value;
-		int ret = set_tr_backlight_status(dev, enable);
+		int ret = set_tr_backlight_status(dev, !value);
 
 		if (ret)
 			return ret;
@@ -2563,7 +2556,6 @@ static int toshiba_acpi_setup_backlight(struct toshiba_acpi_dev *dev)
 	struct backlight_properties props;
 	int brightness;
 	int ret;
-	bool enabled;
 
 	/*
 	 * Some machines don't support the backlight methods at all, and
@@ -2580,10 +2572,6 @@ static int toshiba_acpi_setup_backlight(struct toshiba_acpi_dev *dev)
 		return 0;
 	}
 
-	/* Determine whether or not BIOS supports transflective backlight */
-	ret = get_tr_backlight_status(dev, &enabled);
-	dev->tr_backlight_supported = !ret;
-
 	/*
 	 * Tell acpi-video-detect code to prefer vendor backlight on all
 	 * systems with transflective backlight and on dmi matched systems.
@@ -2723,6 +2711,10 @@ static int toshiba_acpi_add(struct acpi_device *acpi_dev)
 	if (toshiba_acpi_setup_keyboard(dev))
 		pr_info("Unable to activate hotkeys\n");
 
+	/* Determine whether or not BIOS supports transflective backlight */
+	ret = get_tr_backlight_status(dev, &dummy);
+	dev->tr_backlight_supported = !ret;
+
 	ret = toshiba_acpi_setup_backlight(dev);
 	if (ret)
 		goto error;

From d7e4f2e2ca392bce468718bcbba808108d81d501 Mon Sep 17 00:00:00 2001
From: Azael Avalos <coproscefalo@gmail.com>
Date: Wed, 22 Jul 2015 19:37:47 -0600
Subject: [PATCH 054/734] toshiba_acpi: Remove unused wireless defines

Commit 2b74103547b4 ("toshiba_acpi: Remove bluetooth rfkill code")
removed bluetooth related code, however, the wireless defines were
not removed and are unused.

This patch simply removes those defines as there is no code using
them.

Signed-off-by: Azael Avalos <coproscefalo@gmail.com>
Signed-off-by: Darren Hart <dvhart@linux.intel.com>
---
 drivers/platform/x86/toshiba_acpi.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index 649786de4a79d7..90d8cb1c2e27ad 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -113,7 +113,6 @@ MODULE_LICENSE("GPL");
 #define HCI_VIDEO_OUT			0x001c
 #define HCI_HOTKEY_EVENT		0x001e
 #define HCI_LCD_BRIGHTNESS		0x002a
-#define HCI_WIRELESS			0x0056
 #define HCI_ACCELEROMETER		0x006d
 #define HCI_KBD_ILLUMINATION		0x0095
 #define HCI_ECO_MODE			0x0097
@@ -142,10 +141,6 @@ MODULE_LICENSE("GPL");
 #define HCI_VIDEO_OUT_LCD		0x1
 #define HCI_VIDEO_OUT_CRT		0x2
 #define HCI_VIDEO_OUT_TV		0x4
-#define HCI_WIRELESS_KILL_SWITCH	0x01
-#define HCI_WIRELESS_BT_PRESENT		0x0f
-#define HCI_WIRELESS_BT_ATTACH		0x40
-#define HCI_WIRELESS_BT_POWER		0x80
 #define SCI_KBD_MODE_MASK		0x1f
 #define SCI_KBD_MODE_FNZ		0x1
 #define SCI_KBD_MODE_AUTO		0x2

From d50c9005d32b4eda6e11f7ec4f1b00a93088e0ca Mon Sep 17 00:00:00 2001
From: Azael Avalos <coproscefalo@gmail.com>
Date: Wed, 22 Jul 2015 19:37:46 -0600
Subject: [PATCH 055/734] toshiba_acpi: Reorder toshiba_acpi_alt_keymap entries

This patch simply reorders the entries found in the new keymap by
ascending order, this is simply a cosmetic change, no functionality
was modified.

Signed-off-by: Azael Avalos <coproscefalo@gmail.com>
Signed-off-by: Darren Hart <dvhart@linux.intel.com>
---
 drivers/platform/x86/toshiba_acpi.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index 90d8cb1c2e27ad..6013a11caeea81 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -246,16 +246,16 @@ static const struct key_entry toshiba_acpi_keymap[] = {
 };
 
 static const struct key_entry toshiba_acpi_alt_keymap[] = {
-	{ KE_KEY, 0x157, { KEY_MUTE } },
 	{ KE_KEY, 0x102, { KEY_ZOOMOUT } },
 	{ KE_KEY, 0x103, { KEY_ZOOMIN } },
 	{ KE_KEY, 0x12c, { KEY_KBDILLUMTOGGLE } },
 	{ KE_KEY, 0x139, { KEY_ZOOMRESET } },
-	{ KE_KEY, 0x13e, { KEY_SWITCHVIDEOMODE } },
 	{ KE_KEY, 0x13c, { KEY_BRIGHTNESSDOWN } },
 	{ KE_KEY, 0x13d, { KEY_BRIGHTNESSUP } },
-	{ KE_KEY, 0x158, { KEY_WLAN } },
+	{ KE_KEY, 0x13e, { KEY_SWITCHVIDEOMODE } },
 	{ KE_KEY, 0x13f, { KEY_TOUCHPAD_TOGGLE } },
+	{ KE_KEY, 0x157, { KEY_MUTE } },
+	{ KE_KEY, 0x158, { KEY_WLAN } },
 	{ KE_END, 0 },
 };
 

From 1e574dbfadafd9fd1f2a414efb731d7538277e71 Mon Sep 17 00:00:00 2001
From: Azael Avalos <coproscefalo@gmail.com>
Date: Wed, 22 Jul 2015 19:37:49 -0600
Subject: [PATCH 056/734] toshiba_acpi: Change some variables to avoid warnings
 from ninja-check

This patch changes some variables to avoid warnings from ninja-check.

We are basically moving some variables inside the conditionals where
such variables are being used, and we are checking the returned values
of some others.

Signed-off-by: Azael Avalos <coproscefalo@gmail.com>
Signed-off-by: Darren Hart <dvhart@linux.intel.com>
---
 drivers/platform/x86/toshiba_acpi.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index 6013a11caeea81..3bfdfddc38ac3d 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -1651,7 +1651,6 @@ static ssize_t kbd_backlight_mode_store(struct device *dev,
 {
 	struct toshiba_acpi_dev *toshiba = dev_get_drvdata(dev);
 	int mode;
-	int time;
 	int ret;
 
 
@@ -1682,7 +1681,7 @@ static ssize_t kbd_backlight_mode_store(struct device *dev,
 	/* Only make a change if the actual mode has changed */
 	if (toshiba->kbd_mode != mode) {
 		/* Shift the time to "base time" (0x3c0000 == 60 seconds) */
-		time = toshiba->kbd_time << HCI_MISC_SHIFT;
+		int time = toshiba->kbd_time << HCI_MISC_SHIFT;
 
 		/* OR the "base time" to the actual method format */
 		if (toshiba->kbd_type == 1) {
@@ -2856,10 +2855,14 @@ static void toshiba_acpi_notify(struct acpi_device *acpi_dev, u32 event)
 static int toshiba_acpi_suspend(struct device *device)
 {
 	struct toshiba_acpi_dev *dev = acpi_driver_data(to_acpi_device(device));
-	u32 result;
 
-	if (dev->hotkey_dev)
+	if (dev->hotkey_dev) {
+		u32 result;
+
 		result = hci_write(dev, HCI_HOTKEY_EVENT, HCI_HOTKEY_DISABLE);
+		if (result != TOS_SUCCESS)
+			pr_info("Unable to disable hotkeys\n");
+	}
 
 	return 0;
 }
@@ -2867,10 +2870,10 @@ static int toshiba_acpi_suspend(struct device *device)
 static int toshiba_acpi_resume(struct device *device)
 {
 	struct toshiba_acpi_dev *dev = acpi_driver_data(to_acpi_device(device));
-	int error;
 
 	if (dev->hotkey_dev) {
-		error = toshiba_acpi_enable_hotkeys(dev);
+		int error = toshiba_acpi_enable_hotkeys(dev);
+
 		if (error)
 			pr_info("Unable to re-enable hotkeys\n");
 	}

From 5e32940621eb62064d98f42c9889db71b0368bde Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sat, 11 Jul 2015 10:02:46 -0400
Subject: [PATCH 057/734] libnvdimm, btt: sparse fix

Fix:
drivers/nvdimm/btt.c:635:29: warning: restricted __le64 degrades to integer

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/btt.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 411c7b2bb37aec..552f1c4f4dc6cc 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -632,8 +632,9 @@ static void parse_arena_meta(struct arena_info *arena, struct btt_sb *super,
 	arena->logoff = arena_off + le64_to_cpu(super->logoff);
 	arena->info2off = arena_off + le64_to_cpu(super->info2off);
 
-	arena->size = (super->nextoff > 0) ? (le64_to_cpu(super->nextoff)) :
-			(arena->info2off - arena->infooff + BTT_PG_SIZE);
+	arena->size = (le64_to_cpu(super->nextoff) > 0)
+		? (le64_to_cpu(super->nextoff))
+		: (arena->info2off - arena->infooff + BTT_PG_SIZE);
 
 	arena->flags = le32_to_cpu(super->flags);
 }

From ec92777f2ba93c00387b8fe53780c25adc57c744 Mon Sep 17 00:00:00 2001
From: Vishal Verma <vishal.l.verma@intel.com>
Date: Thu, 9 Jul 2015 13:25:35 -0600
Subject: [PATCH 058/734] libnvdimm: Update name of the ars_status_record mask
 field

The spec suggests that this is a simple 'length' field, not a mask.
Update the name accordingly.

Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/uapi/linux/ndctl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
index 2b94ea2287bb92..e94bc20016b2d2 100644
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h
@@ -87,7 +87,7 @@ struct nd_cmd_ars_status {
 		__u32 handle;
 		__u32 flags;
 		__u64 err_address;
-		__u64 mask;
+		__u64 length;
 	} __packed records[0];
 } __packed;
 

From 39c686b862cdb2049b90e095b6c6c727b2a7ab60 Mon Sep 17 00:00:00 2001
From: Vishal Verma <vishal.l.verma@intel.com>
Date: Thu, 9 Jul 2015 13:25:36 -0600
Subject: [PATCH 059/734] libnvdimm: Add DSM support for Address Range Scrub
 commands

Add support for the three ARS DSM commands:
- Query ARS Capabilities - Queries the firmware to check if a given
  range supports scrub, and if so, which type (persistent vs. volatile)
- Start ARS - Starts a scrub for a given range/type
- Query ARS Status - Checks status of a previously started scrub, and
  provides the error logs if any.

  The commands are described by the example DSM spec at:
  http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf

Also add these commands to the nfit_test test framework, and return
canned data.

Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit.c              |   1 +
 drivers/acpi/nfit.h              |   1 +
 include/uapi/linux/ndctl.h       |  10 ++
 tools/testing/nvdimm/test/nfit.c | 199 ++++++++++++++++++++++---------
 4 files changed, 152 insertions(+), 59 deletions(-)

diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
index 628a42c41ab126..ef8a664db25412 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit.c
@@ -868,6 +868,7 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
 	struct acpi_device *adev;
 	int i;
 
+	nd_desc->dsm_mask = acpi_desc->bus_dsm_force_en;
 	adev = to_acpi_dev(acpi_desc);
 	if (!adev)
 		return;
diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h
index 79b6d83875c1de..f2c2bb751882c3 100644
--- a/drivers/acpi/nfit.h
+++ b/drivers/acpi/nfit.h
@@ -107,6 +107,7 @@ struct acpi_nfit_desc {
 	struct nvdimm_bus *nvdimm_bus;
 	struct device *dev;
 	unsigned long dimm_dsm_force_en;
+	unsigned long bus_dsm_force_en;
 	int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
 			void *iobuf, u64 len, int rw);
 };
diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
index e94bc20016b2d2..5b4a4be06e2b93 100644
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h
@@ -111,6 +111,11 @@ enum {
 	ND_CMD_VENDOR = 9,
 };
 
+enum {
+	ND_ARS_VOLATILE = 1,
+	ND_ARS_PERSISTENT = 2,
+};
+
 static inline const char *nvdimm_bus_cmd_name(unsigned cmd)
 {
 	static const char * const names[] = {
@@ -194,4 +199,9 @@ enum nd_driver_flags {
 enum {
 	ND_MIN_NAMESPACE_SIZE = 0x00400000,
 };
+
+enum ars_masks {
+	ARS_STATUS_MASK = 0x0000FFFF,
+	ARS_EXT_STATUS_SHIFT = 16,
+};
 #endif /* __NDCTL_H__ */
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index d0bdae40ccc903..28dba918524e5b 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -147,75 +147,153 @@ static struct nfit_test *to_nfit_test(struct device *dev)
 	return container_of(pdev, struct nfit_test, pdev);
 }
 
+static int nfit_test_cmd_get_config_size(struct nd_cmd_get_config_size *nd_cmd,
+		unsigned int buf_len)
+{
+	if (buf_len < sizeof(*nd_cmd))
+		return -EINVAL;
+
+	nd_cmd->status = 0;
+	nd_cmd->config_size = LABEL_SIZE;
+	nd_cmd->max_xfer = SZ_4K;
+
+	return 0;
+}
+
+static int nfit_test_cmd_get_config_data(struct nd_cmd_get_config_data_hdr
+		*nd_cmd, unsigned int buf_len, void *label)
+{
+	unsigned int len, offset = nd_cmd->in_offset;
+	int rc;
+
+	if (buf_len < sizeof(*nd_cmd))
+		return -EINVAL;
+	if (offset >= LABEL_SIZE)
+		return -EINVAL;
+	if (nd_cmd->in_length + sizeof(*nd_cmd) > buf_len)
+		return -EINVAL;
+
+	nd_cmd->status = 0;
+	len = min(nd_cmd->in_length, LABEL_SIZE - offset);
+	memcpy(nd_cmd->out_buf, label + offset, len);
+	rc = buf_len - sizeof(*nd_cmd) - len;
+
+	return rc;
+}
+
+static int nfit_test_cmd_set_config_data(struct nd_cmd_set_config_hdr *nd_cmd,
+		unsigned int buf_len, void *label)
+{
+	unsigned int len, offset = nd_cmd->in_offset;
+	u32 *status;
+	int rc;
+
+	if (buf_len < sizeof(*nd_cmd))
+		return -EINVAL;
+	if (offset >= LABEL_SIZE)
+		return -EINVAL;
+	if (nd_cmd->in_length + sizeof(*nd_cmd) + 4 > buf_len)
+		return -EINVAL;
+
+	status = (void *)nd_cmd + nd_cmd->in_length + sizeof(*nd_cmd);
+	*status = 0;
+	len = min(nd_cmd->in_length, LABEL_SIZE - offset);
+	memcpy(label + offset, nd_cmd->in_buf, len);
+	rc = buf_len - sizeof(*nd_cmd) - (len + 4);
+
+	return rc;
+}
+
+static int nfit_test_cmd_ars_cap(struct nd_cmd_ars_cap *nd_cmd,
+		unsigned int buf_len)
+{
+	if (buf_len < sizeof(*nd_cmd))
+		return -EINVAL;
+
+	nd_cmd->max_ars_out = 256;
+	nd_cmd->status = (ND_ARS_PERSISTENT | ND_ARS_VOLATILE) << 16;
+
+	return 0;
+}
+
+static int nfit_test_cmd_ars_start(struct nd_cmd_ars_start *nd_cmd,
+		unsigned int buf_len)
+{
+	if (buf_len < sizeof(*nd_cmd))
+		return -EINVAL;
+
+	nd_cmd->status = 0;
+
+	return 0;
+}
+
+static int nfit_test_cmd_ars_status(struct nd_cmd_ars_status *nd_cmd,
+		unsigned int buf_len)
+{
+	if (buf_len < sizeof(*nd_cmd))
+		return -EINVAL;
+
+	nd_cmd->out_length = 256;
+	nd_cmd->num_records = 0;
+	nd_cmd->status = 0;
+
+	return 0;
+}
+
 static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
 		struct nvdimm *nvdimm, unsigned int cmd, void *buf,
 		unsigned int buf_len)
 {
 	struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
 	struct nfit_test *t = container_of(acpi_desc, typeof(*t), acpi_desc);
-	struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
-	int i, rc;
+	int i, rc = 0;
+
+	if (nvdimm) {
+		struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
 
-	if (!nfit_mem || !test_bit(cmd, &nfit_mem->dsm_mask))
-		return -ENOTTY;
+		if (!nfit_mem || !test_bit(cmd, &nfit_mem->dsm_mask))
+			return -ENOTTY;
 
-	/* lookup label space for the given dimm */
-	for (i = 0; i < ARRAY_SIZE(handle); i++)
-		if (__to_nfit_memdev(nfit_mem)->device_handle == handle[i])
+		/* lookup label space for the given dimm */
+		for (i = 0; i < ARRAY_SIZE(handle); i++)
+			if (__to_nfit_memdev(nfit_mem)->device_handle ==
+					handle[i])
+				break;
+		if (i >= ARRAY_SIZE(handle))
+			return -ENXIO;
+
+		switch (cmd) {
+		case ND_CMD_GET_CONFIG_SIZE:
+			rc = nfit_test_cmd_get_config_size(buf, buf_len);
 			break;
-	if (i >= ARRAY_SIZE(handle))
-		return -ENXIO;
+		case ND_CMD_GET_CONFIG_DATA:
+			rc = nfit_test_cmd_get_config_data(buf, buf_len,
+				t->label[i]);
+			break;
+		case ND_CMD_SET_CONFIG_DATA:
+			rc = nfit_test_cmd_set_config_data(buf, buf_len,
+				t->label[i]);
+			break;
+		default:
+			return -ENOTTY;
+		}
+	} else {
+		if (!nd_desc || !test_bit(cmd, &nd_desc->dsm_mask))
+			return -ENOTTY;
 
-	switch (cmd) {
-	case ND_CMD_GET_CONFIG_SIZE: {
-		struct nd_cmd_get_config_size *nd_cmd = buf;
-
-		if (buf_len < sizeof(*nd_cmd))
-			return -EINVAL;
-		nd_cmd->status = 0;
-		nd_cmd->config_size = LABEL_SIZE;
-		nd_cmd->max_xfer = SZ_4K;
-		rc = 0;
-		break;
-	}
-	case ND_CMD_GET_CONFIG_DATA: {
-		struct nd_cmd_get_config_data_hdr *nd_cmd = buf;
-		unsigned int len, offset = nd_cmd->in_offset;
-
-		if (buf_len < sizeof(*nd_cmd))
-			return -EINVAL;
-		if (offset >= LABEL_SIZE)
-			return -EINVAL;
-		if (nd_cmd->in_length + sizeof(*nd_cmd) > buf_len)
-			return -EINVAL;
-
-		nd_cmd->status = 0;
-		len = min(nd_cmd->in_length, LABEL_SIZE - offset);
-		memcpy(nd_cmd->out_buf, t->label[i] + offset, len);
-		rc = buf_len - sizeof(*nd_cmd) - len;
-		break;
-	}
-	case ND_CMD_SET_CONFIG_DATA: {
-		struct nd_cmd_set_config_hdr *nd_cmd = buf;
-		unsigned int len, offset = nd_cmd->in_offset;
-		u32 *status;
-
-		if (buf_len < sizeof(*nd_cmd))
-			return -EINVAL;
-		if (offset >= LABEL_SIZE)
-			return -EINVAL;
-		if (nd_cmd->in_length + sizeof(*nd_cmd) + 4 > buf_len)
-			return -EINVAL;
-
-		status = buf + nd_cmd->in_length + sizeof(*nd_cmd);
-		*status = 0;
-		len = min(nd_cmd->in_length, LABEL_SIZE - offset);
-		memcpy(t->label[i] + offset, nd_cmd->in_buf, len);
-		rc = buf_len - sizeof(*nd_cmd) - (len + 4);
-		break;
-	}
-	default:
-		return -ENOTTY;
+		switch (cmd) {
+		case ND_CMD_ARS_CAP:
+			rc = nfit_test_cmd_ars_cap(buf, buf_len);
+			break;
+		case ND_CMD_ARS_START:
+			rc = nfit_test_cmd_ars_start(buf, buf_len);
+			break;
+		case ND_CMD_ARS_STATUS:
+			rc = nfit_test_cmd_ars_status(buf, buf_len);
+			break;
+		default:
+			return -ENOTTY;
+		}
 	}
 
 	return rc;
@@ -876,6 +954,9 @@ static void nfit_test0_setup(struct nfit_test *t)
 	set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_dsm_force_en);
 	set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en);
 	set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en);
+	set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_dsm_force_en);
+	set_bit(ND_CMD_ARS_START, &acpi_desc->bus_dsm_force_en);
+	set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_dsm_force_en);
 	nd_desc = &acpi_desc->nd_desc;
 	nd_desc->ndctl = nfit_test_ctl;
 }

From 6b47496a6fc81816e7edaf8224dfb88e402a05f5 Mon Sep 17 00:00:00 2001
From: Vishal Verma <vishal.l.verma@intel.com>
Date: Thu, 23 Jul 2015 11:58:48 -0600
Subject: [PATCH 060/734] libnvdimm, pmem: Change pmem physical sector size to
 PAGE_SIZE

Based on a patch: c8fa317 brd: Request from fdisk 4k alignment by Boaz
Harrosh, allow fdisk to create properly aligned partitions for DAX. This
will also cause mkfs.ext4 to emit a warning if using a file system block
size of less than PAGE_SIZE.

Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Matthew Wilcox <matthew.r.wilcox@intel.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Elliott, Robert <Elliott@hp.com>
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
Acked-by: Boaz Harrosh <boaz@plexistor.com>
Acked-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/pmem.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index ade9eb917a4d94..bcf48f1334431b 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -162,6 +162,7 @@ static int pmem_attach_disk(struct nd_namespace_common *ndns,
 		return -ENOMEM;
 
 	blk_queue_make_request(pmem->pmem_queue, pmem_make_request);
+	blk_queue_physical_block_size(pmem->pmem_queue, PAGE_SIZE);
 	blk_queue_max_hw_sectors(pmem->pmem_queue, UINT_MAX);
 	blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY);
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, pmem->pmem_queue);

From 60e95f43fc8573e81f54b0c1e0bc542c2260d956 Mon Sep 17 00:00:00 2001
From: Linda Knippers <linda.knippers@hp.com>
Date: Wed, 22 Jul 2015 16:17:22 -0400
Subject: [PATCH 061/734] nfit: Don't check _STA on NVDIMM devices

The _STA only applies to the root device, not the individual NVDIMMS,
so don't check here. NVDIMM device state flags are checked elsewhere.

Signed-off-by: Linda Knippers <linda.knippers@hp.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit.c | 20 ++------------------
 1 file changed, 2 insertions(+), 18 deletions(-)

diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
index ef8a664db25412..7c2638f914a909 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit.c
@@ -764,9 +764,7 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 	struct acpi_device *adev, *adev_dimm;
 	struct device *dev = acpi_desc->dev;
 	const u8 *uuid = to_nfit_uuid(NFIT_DEV_DIMM);
-	unsigned long long sta;
-	int i, rc = -ENODEV;
-	acpi_status status;
+	int i;
 
 	nfit_mem->dsm_mask = acpi_desc->dimm_dsm_force_en;
 	adev = to_acpi_dev(acpi_desc);
@@ -781,25 +779,11 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 		return force_enable_dimms ? 0 : -ENODEV;
 	}
 
-	status = acpi_evaluate_integer(adev_dimm->handle, "_STA", NULL, &sta);
-	if (status == AE_NOT_FOUND) {
-		dev_dbg(dev, "%s missing _STA, assuming enabled...\n",
-				dev_name(&adev_dimm->dev));
-		rc = 0;
-	} else if (ACPI_FAILURE(status))
-		dev_err(dev, "%s failed to retrieve_STA, disabling...\n",
-				dev_name(&adev_dimm->dev));
-	else if ((sta & ACPI_STA_DEVICE_ENABLED) == 0)
-		dev_info(dev, "%s disabled by firmware\n",
-				dev_name(&adev_dimm->dev));
-	else
-		rc = 0;
-
 	for (i = ND_CMD_SMART; i <= ND_CMD_VENDOR; i++)
 		if (acpi_check_dsm(adev_dimm->handle, uuid, 1, 1ULL << i))
 			set_bit(i, &nfit_mem->dsm_mask);
 
-	return force_enable_dimms ? 0 : rc;
+	return 0;
 }
 
 static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)

From 730daa164e7c7e31c08fab940549f4acc3329432 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 23 Jul 2015 18:02:48 -0700
Subject: [PATCH 062/734] Yama: remove needless CONFIG_SECURITY_YAMA_STACKED

Now that minor LSMs can cleanly stack with major LSMs, remove the unneeded
config for Yama to be made to explicitly stack. Just selecting the main
Yama CONFIG will allow it to work, regardless of the major LSM. Since
distros using Yama are already forcing it to stack, this is effectively
a no-op change.

Additionally add MAINTAINERS entry.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 Documentation/security/Yama.txt       | 10 ++++-----
 MAINTAINERS                           |  6 +++++
 arch/mips/configs/pistachio_defconfig |  1 -
 include/linux/lsm_hooks.h             |  6 +++--
 security/Kconfig                      |  5 -----
 security/security.c                   | 11 +++------
 security/yama/Kconfig                 |  9 +-------
 security/yama/yama_lsm.c              | 32 +++++++++------------------
 8 files changed, 28 insertions(+), 52 deletions(-)

diff --git a/Documentation/security/Yama.txt b/Documentation/security/Yama.txt
index 227a63f018a27d..d9ee7d7a6c7fda 100644
--- a/Documentation/security/Yama.txt
+++ b/Documentation/security/Yama.txt
@@ -1,9 +1,7 @@
-Yama is a Linux Security Module that collects a number of system-wide DAC
-security protections that are not handled by the core kernel itself. To
-select it at boot time, specify "security=yama" (though this will disable
-any other LSM).
-
-Yama is controlled through sysctl in /proc/sys/kernel/yama:
+Yama is a Linux Security Module that collects system-wide DAC security
+protections that are not handled by the core kernel itself. This is
+selectable at build-time with CONFIG_SECURITY_YAMA, and can be controlled
+at run-time through sysctls in /proc/sys/kernel/yama:
 
 - ptrace_scope
 
diff --git a/MAINTAINERS b/MAINTAINERS
index a2264167791acd..f8be2f79719758 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9102,6 +9102,12 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jj/apparmor-dev.git
 S:	Supported
 F:	security/apparmor/
 
+YAMA SECURITY MODULE
+M:	Kees Cook <keescook@chromium.org>
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git yama/tip
+S:	Supported
+F:	security/yama/
+
 SENSABLE PHANTOM
 M:	Jiri Slaby <jirislaby@gmail.com>
 S:	Maintained
diff --git a/arch/mips/configs/pistachio_defconfig b/arch/mips/configs/pistachio_defconfig
index 1646cce032c34a..642b50946943cc 100644
--- a/arch/mips/configs/pistachio_defconfig
+++ b/arch/mips/configs/pistachio_defconfig
@@ -320,7 +320,6 @@ CONFIG_KEYS=y
 CONFIG_SECURITY=y
 CONFIG_SECURITY_NETWORK=y
 CONFIG_SECURITY_YAMA=y
-CONFIG_SECURITY_YAMA_STACKED=y
 CONFIG_DEFAULT_SECURITY_DAC=y
 CONFIG_CRYPTO_AUTHENC=y
 CONFIG_CRYPTO_HMAC=y
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 9429f054c32396..ec3a6bab29de3a 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -1881,8 +1881,10 @@ static inline void security_delete_hooks(struct security_hook_list *hooks,
 
 extern int __init security_module_enable(const char *module);
 extern void __init capability_add_hooks(void);
-#ifdef CONFIG_SECURITY_YAMA_STACKED
-void __init yama_add_hooks(void);
+#ifdef CONFIG_SECURITY_YAMA
+extern void __init yama_add_hooks(void);
+#else
+static inline void __init yama_add_hooks(void) { }
 #endif
 
 #endif /* ! __LINUX_LSM_HOOKS_H */
diff --git a/security/Kconfig b/security/Kconfig
index bf4ec46474b631..e45237897b435f 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -132,7 +132,6 @@ choice
 	default DEFAULT_SECURITY_SMACK if SECURITY_SMACK
 	default DEFAULT_SECURITY_TOMOYO if SECURITY_TOMOYO
 	default DEFAULT_SECURITY_APPARMOR if SECURITY_APPARMOR
-	default DEFAULT_SECURITY_YAMA if SECURITY_YAMA
 	default DEFAULT_SECURITY_DAC
 
 	help
@@ -151,9 +150,6 @@ choice
 	config DEFAULT_SECURITY_APPARMOR
 		bool "AppArmor" if SECURITY_APPARMOR=y
 
-	config DEFAULT_SECURITY_YAMA
-		bool "Yama" if SECURITY_YAMA=y
-
 	config DEFAULT_SECURITY_DAC
 		bool "Unix Discretionary Access Controls"
 
@@ -165,7 +161,6 @@ config DEFAULT_SECURITY
 	default "smack" if DEFAULT_SECURITY_SMACK
 	default "tomoyo" if DEFAULT_SECURITY_TOMOYO
 	default "apparmor" if DEFAULT_SECURITY_APPARMOR
-	default "yama" if DEFAULT_SECURITY_YAMA
 	default "" if DEFAULT_SECURITY_DAC
 
 endmenu
diff --git a/security/security.c b/security/security.c
index 595fffab48b0a2..e693ffcf9266e0 100644
--- a/security/security.c
+++ b/security/security.c
@@ -56,18 +56,13 @@ int __init security_init(void)
 	pr_info("Security Framework initialized\n");
 
 	/*
-	 * Always load the capability module.
+	 * Load minor LSMs, with the capability module always first.
 	 */
 	capability_add_hooks();
-#ifdef CONFIG_SECURITY_YAMA_STACKED
-	/*
-	 * If Yama is configured for stacking load it next.
-	 */
 	yama_add_hooks();
-#endif
+
 	/*
-	 * Load the chosen module if there is one.
-	 * This will also find yama if it is stacking
+	 * Load all the remaining security modules.
 	 */
 	do_security_initcalls();
 
diff --git a/security/yama/Kconfig b/security/yama/Kconfig
index 3123e1da2fedb0..90c605eea89215 100644
--- a/security/yama/Kconfig
+++ b/security/yama/Kconfig
@@ -6,14 +6,7 @@ config SECURITY_YAMA
 	  This selects Yama, which extends DAC support with additional
 	  system-wide security settings beyond regular Linux discretionary
 	  access controls. Currently available is ptrace scope restriction.
+	  Like capabilities, this security module stacks with other LSMs.
 	  Further information can be found in Documentation/security/Yama.txt.
 
 	  If you are unsure how to answer this question, answer N.
-
-config SECURITY_YAMA_STACKED
-	bool "Yama stacked with other LSMs"
-	depends on SECURITY_YAMA
-	default n
-	help
-	  When Yama is built into the kernel, force it to stack with the
-	  selected primary LSM.
diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c
index 9ed32502470e9b..d3c19c970a06bf 100644
--- a/security/yama/yama_lsm.c
+++ b/security/yama/yama_lsm.c
@@ -353,11 +353,6 @@ static struct security_hook_list yama_hooks[] = {
 	LSM_HOOK_INIT(task_free, yama_task_free),
 };
 
-void __init yama_add_hooks(void)
-{
-	security_add_hooks(yama_hooks, ARRAY_SIZE(yama_hooks));
-}
-
 #ifdef CONFIG_SYSCTL
 static int yama_dointvec_minmax(struct ctl_table *table, int write,
 				void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -396,25 +391,18 @@ static struct ctl_table yama_sysctl_table[] = {
 	},
 	{ }
 };
-#endif /* CONFIG_SYSCTL */
-
-static __init int yama_init(void)
+static void __init yama_init_sysctl(void)
 {
-#ifndef CONFIG_SECURITY_YAMA_STACKED
-	/*
-	 * If yama is being stacked this is already taken care of.
-	 */
-	if (!security_module_enable("yama"))
-		return 0;
-#endif
-	pr_info("Yama: becoming mindful.\n");
-
-#ifdef CONFIG_SYSCTL
 	if (!register_sysctl_paths(yama_sysctl_path, yama_sysctl_table))
 		panic("Yama: sysctl registration failed.\n");
-#endif
-
-	return 0;
 }
+#else
+static inline void yama_init_sysctl(void) { }
+#endif /* CONFIG_SYSCTL */
 
-security_initcall(yama_init);
+void __init yama_add_hooks(void)
+{
+	pr_info("Yama: becoming mindful.\n");
+	security_add_hooks(yama_hooks, ARRAY_SIZE(yama_hooks));
+	yama_init_sysctl();
+}

From 21abb1ec414c75abe32c3854848ff30e2b4a6113 Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Wed, 22 Jul 2015 14:25:31 -0700
Subject: [PATCH 063/734] Smack: IPv6 host labeling

IPv6 appears to be (finally) coming of age with the
influx of autonomous devices. In support of this, add
the ability to associate a Smack label with IPv6 addresses.

This patch also cleans up some of the conditional
compilation associated with the introduction of
secmark processing. It's now more obvious which bit
of code goes with which feature.

Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
---
 Documentation/security/Smack.txt |  27 +-
 security/smack/smack.h           |  48 +++-
 security/smack/smack_lsm.c       | 262 +++++++++++++------
 security/smack/smackfs.c         | 428 +++++++++++++++++++++++++------
 4 files changed, 604 insertions(+), 161 deletions(-)

diff --git a/Documentation/security/Smack.txt b/Documentation/security/Smack.txt
index de5e1aeca7fb95..5e6d07fbed07c5 100644
--- a/Documentation/security/Smack.txt
+++ b/Documentation/security/Smack.txt
@@ -28,6 +28,10 @@ Smack kernels use the CIPSO IP option. Some network
 configurations are intolerant of IP options and can impede
 access to systems that use them as Smack does.
 
+Smack is used in the Tizen operating system. Please
+go to http://wiki.tizen.org for information about how
+Smack is used in Tizen.
+
 The current git repository for Smack user space is:
 
 	git://github.com/smack-team/smack.git
@@ -108,6 +112,8 @@ in the smackfs filesystem. This pseudo-filesystem is mounted
 on /sys/fs/smackfs.
 
 access
+	Provided for backward compatibility. The access2 interface
+	is preferred and should be used instead.
 	This interface reports whether a subject with the specified
 	Smack label has a particular access to an object with a
 	specified Smack label. Write a fixed format access rule to
@@ -136,6 +142,8 @@ change-rule
 	those in the fourth string. If there is no such rule it will be
 	created using the access specified in the third and the fourth strings.
 cipso
+	Provided for backward compatibility. The cipso2 interface
+	is preferred and should be used instead.
 	This interface allows a specific CIPSO header to be assigned
 	to a Smack label. The format accepted on write is:
 		"%24s%4d%4d"["%4d"]...
@@ -157,7 +165,19 @@ direct
 doi
 	This contains the CIPSO domain of interpretation used in
 	network packets.
+ipv6host
+	This interface allows specific IPv6 internet addresses to be
+	treated as single label hosts. Packets are sent to single
+	label hosts only from processes that have Smack write access
+	to the host label. All packets received from single label hosts
+	are given the specified label. The format accepted on write is:
+		"%h:%h:%h:%h:%h:%h:%h:%h label" or
+		"%h:%h:%h:%h:%h:%h:%h:%h/%d label".
+	The "::" address shortcut is not supported.
+	If label is "-DELETE" a matched entry will be deleted.
 load
+	Provided for backward compatibility. The load2 interface
+	is preferred and should be used instead.
 	This interface allows access control rules in addition to
 	the system defined rules to be specified. The format accepted
 	on write is:
@@ -181,6 +201,8 @@ load2
 	permissions that are not allowed. The string "r-x--" would
 	specify read and execute access.
 load-self
+	Provided for backward compatibility. The load-self2 interface
+	is preferred and should be used instead.
 	This interface allows process specific access rules to be
 	defined. These rules are only consulted if access would
 	otherwise be permitted, and are intended to provide additional
@@ -205,6 +227,8 @@ netlabel
 	received from single label hosts are given the specified
 	label. The format accepted on write is:
 		"%d.%d.%d.%d label" or "%d.%d.%d.%d/%d label".
+	If the label specified is "-CIPSO" the address is treated
+	as a host that supports CIPSO headers.
 onlycap
 	This contains labels processes must have for CAP_MAC_ADMIN
 	and CAP_MAC_OVERRIDE to be effective. If this file is empty
@@ -232,7 +256,8 @@ unconfined
 	is dangerous and can ruin the proper labeling of your system.
 	It should never be used in production.
 
-You can add access rules in /etc/smack/accesses. They take the form:
+If you are using the smackload utility
+you can add access rules in /etc/smack/accesses. They take the form:
 
     subjectlabel objectlabel access
 
diff --git a/security/smack/smack.h b/security/smack/smack.h
index 69ab9eb7d6d927..fff0c612bbb77b 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -17,11 +17,26 @@
 #include <linux/spinlock.h>
 #include <linux/lsm_hooks.h>
 #include <linux/in.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <linux/in6.h>
+#endif /* CONFIG_IPV6 */
 #include <net/netlabel.h>
 #include <linux/list.h>
 #include <linux/rculist.h>
 #include <linux/lsm_audit.h>
 
+/*
+ * Use IPv6 port labeling if IPv6 is enabled and secmarks
+ * are not being used.
+ */
+#if IS_ENABLED(CONFIG_IPV6) && !defined(CONFIG_SECURITY_SMACK_NETFILTER)
+#define SMACK_IPV6_PORT_LABELING 1
+#endif
+
+#if IS_ENABLED(CONFIG_IPV6) && defined(CONFIG_SECURITY_SMACK_NETFILTER)
+#define SMACK_IPV6_SECMARK_LABELING 1
+#endif
+
 /*
  * Smack labels were limited to 23 characters for a long time.
  */
@@ -118,15 +133,30 @@ struct smack_rule {
 };
 
 /*
- * An entry in the table identifying hosts.
+ * An entry in the table identifying IPv4 hosts.
  */
-struct smk_netlbladdr {
+struct smk_net4addr {
 	struct list_head	list;
-	struct sockaddr_in	smk_host;	/* network address */
+	struct in_addr		smk_host;	/* network address */
 	struct in_addr		smk_mask;	/* network mask */
+	int			smk_masks;	/* mask size */
+	struct smack_known	*smk_label;	/* label */
+};
+
+#if IS_ENABLED(CONFIG_IPV6)
+/*
+ * An entry in the table identifying IPv6 hosts.
+ */
+struct smk_net6addr {
+	struct list_head	list;
+	struct in6_addr		smk_host;	/* network address */
+	struct in6_addr		smk_mask;	/* network mask */
+	int			smk_masks;	/* mask size */
 	struct smack_known	*smk_label;	/* label */
 };
+#endif /* CONFIG_IPV6 */
 
+#ifdef SMACK_IPV6_PORT_LABELING
 /*
  * An entry in the table identifying ports.
  */
@@ -137,6 +167,7 @@ struct smk_port_label {
 	struct smack_known	*smk_in;	/* inbound label */
 	struct smack_known	*smk_out;	/* outgoing label */
 };
+#endif /* SMACK_IPV6_PORT_LABELING */
 
 struct smack_onlycap {
 	struct list_head	list;
@@ -170,6 +201,7 @@ enum {
 #define SMK_FSROOT	"smackfsroot="
 #define SMK_FSTRANS	"smackfstransmute="
 
+#define SMACK_DELETE_OPTION	"-DELETE"
 #define SMACK_CIPSO_OPTION 	"-CIPSO"
 
 /*
@@ -252,10 +284,6 @@ struct smk_audit_info {
 	struct smack_audit_data sad;
 #endif
 };
-/*
- * These functions are in smack_lsm.c
- */
-struct inode_smack *new_inode_smack(struct smack_known *);
 
 /*
  * These functions are in smack_access.c
@@ -285,7 +313,6 @@ extern struct smack_known *smack_syslog_label;
 #ifdef CONFIG_SECURITY_SMACK_BRINGUP
 extern struct smack_known *smack_unconfined;
 #endif
-extern struct smack_known smack_cipso_option;
 extern int smack_ptrace_rule;
 
 extern struct smack_known smack_known_floor;
@@ -297,7 +324,10 @@ extern struct smack_known smack_known_web;
 
 extern struct mutex	smack_known_lock;
 extern struct list_head smack_known_list;
-extern struct list_head smk_netlbladdr_list;
+extern struct list_head smk_net4addr_list;
+#if IS_ENABLED(CONFIG_IPV6)
+extern struct list_head smk_net6addr_list;
+#endif /* CONFIG_IPV6 */
 
 extern struct mutex     smack_onlycap_lock;
 extern struct list_head smack_onlycap_list;
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index d962f887d3f445..cc390bccecd775 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -51,9 +51,9 @@
 #define SMK_RECEIVING	1
 #define SMK_SENDING	2
 
-#if IS_ENABLED(CONFIG_IPV6) && !defined(CONFIG_SECURITY_SMACK_NETFILTER)
+#ifdef SMACK_IPV6_PORT_LABELING
 LIST_HEAD(smk_ipv6_port_list);
-#endif /* CONFIG_IPV6 && !CONFIG_SECURITY_SMACK_NETFILTER */
+#endif
 static struct kmem_cache *smack_inode_cache;
 int smack_enabled;
 
@@ -2272,7 +2272,7 @@ static void smack_sk_free_security(struct sock *sk)
 }
 
 /**
-* smack_host_label - check host based restrictions
+* smack_ipv4host_label - check host based restrictions
 * @sip: the object end
 *
 * looks for host based access restrictions
@@ -2283,30 +2283,96 @@ static void smack_sk_free_security(struct sock *sk)
 *
 * Returns the label of the far end or NULL if it's not special.
 */
-static struct smack_known *smack_host_label(struct sockaddr_in *sip)
+static struct smack_known *smack_ipv4host_label(struct sockaddr_in *sip)
 {
-	struct smk_netlbladdr *snp;
+	struct smk_net4addr *snp;
 	struct in_addr *siap = &sip->sin_addr;
 
 	if (siap->s_addr == 0)
 		return NULL;
 
-	list_for_each_entry_rcu(snp, &smk_netlbladdr_list, list)
+	list_for_each_entry_rcu(snp, &smk_net4addr_list, list)
+		/*
+		 * we break after finding the first match because
+		 * the list is sorted from longest to shortest mask
+		 * so we have found the most specific match
+		 */
+		if (snp->smk_host.s_addr ==
+		    (siap->s_addr & snp->smk_mask.s_addr))
+			return snp->smk_label;
+
+	return NULL;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+/*
+ * smk_ipv6_localhost - Check for local ipv6 host address
+ * @sip: the address
+ *
+ * Returns boolean true if this is the localhost address
+ */
+static bool smk_ipv6_localhost(struct sockaddr_in6 *sip)
+{
+	__be16 *be16p = (__be16 *)&sip->sin6_addr;
+	__be32 *be32p = (__be32 *)&sip->sin6_addr;
+
+	if (be32p[0] == 0 && be32p[1] == 0 && be32p[2] == 0 && be16p[6] == 0 &&
+	    ntohs(be16p[7]) == 1)
+		return true;
+	return false;
+}
+
+/**
+* smack_ipv6host_label - check host based restrictions
+* @sip: the object end
+*
+* looks for host based access restrictions
+*
+* This version will only be appropriate for really small sets of single label
+* hosts.  The caller is responsible for ensuring that the RCU read lock is
+* taken before calling this function.
+*
+* Returns the label of the far end or NULL if it's not special.
+*/
+static struct smack_known *smack_ipv6host_label(struct sockaddr_in6 *sip)
+{
+	struct smk_net6addr *snp;
+	struct in6_addr *sap = &sip->sin6_addr;
+	int i;
+	int found = 0;
+
+	/*
+	 * It's local. Don't look for a host label.
+	 */
+	if (smk_ipv6_localhost(sip))
+		return NULL;
+
+	list_for_each_entry_rcu(snp, &smk_net6addr_list, list) {
 		/*
 		* we break after finding the first match because
 		* the list is sorted from longest to shortest mask
 		* so we have found the most specific match
 		*/
-		if ((&snp->smk_host.sin_addr)->s_addr ==
-		    (siap->s_addr & (&snp->smk_mask)->s_addr)) {
-			/* we have found the special CIPSO option */
-			if (snp->smk_label == &smack_cipso_option)
-				return NULL;
-			return snp->smk_label;
+		for (found = 1, i = 0; i < 8; i++) {
+			/*
+			 * If the label is NULL the entry has
+			 * been renounced. Ignore it.
+			 */
+			if (snp->smk_label == NULL)
+				continue;
+			if ((sap->s6_addr16[i] & snp->smk_mask.s6_addr16[i]) !=
+			    snp->smk_host.s6_addr16[i]) {
+				found = 0;
+				break;
+			}
 		}
+		if (found)
+			return snp->smk_label;
+	}
 
 	return NULL;
 }
+#endif /* CONFIG_IPV6 */
 
 /**
  * smack_netlabel - Set the secattr on a socket
@@ -2370,7 +2436,7 @@ static int smack_netlabel_send(struct sock *sk, struct sockaddr_in *sap)
 	struct smk_audit_info ad;
 
 	rcu_read_lock();
-	hkp = smack_host_label(sap);
+	hkp = smack_ipv4host_label(sap);
 	if (hkp != NULL) {
 #ifdef CONFIG_AUDIT
 		struct lsm_network_audit net;
@@ -2395,7 +2461,42 @@ static int smack_netlabel_send(struct sock *sk, struct sockaddr_in *sap)
 	return smack_netlabel(sk, sk_lbl);
 }
 
-#if IS_ENABLED(CONFIG_IPV6) && !defined(CONFIG_SECURITY_SMACK_NETFILTER)
+#if IS_ENABLED(CONFIG_IPV6)
+/**
+ * smk_ipv6_check - check Smack access
+ * @subject: subject Smack label
+ * @object: object Smack label
+ * @address: address
+ * @act: the action being taken
+ *
+ * Check an IPv6 access
+ */
+static int smk_ipv6_check(struct smack_known *subject,
+				struct smack_known *object,
+				struct sockaddr_in6 *address, int act)
+{
+#ifdef CONFIG_AUDIT
+	struct lsm_network_audit net;
+#endif
+	struct smk_audit_info ad;
+	int rc;
+
+#ifdef CONFIG_AUDIT
+	smk_ad_init_net(&ad, __func__, LSM_AUDIT_DATA_NET, &net);
+	ad.a.u.net->family = PF_INET6;
+	ad.a.u.net->dport = ntohs(address->sin6_port);
+	if (act == SMK_RECEIVING)
+		ad.a.u.net->v6info.saddr = address->sin6_addr;
+	else
+		ad.a.u.net->v6info.daddr = address->sin6_addr;
+#endif
+	rc = smk_access(subject, object, MAY_WRITE, &ad);
+	rc = smk_bu_note("IPv6 check", subject, object, MAY_WRITE, rc);
+	return rc;
+}
+#endif /* CONFIG_IPV6 */
+
+#ifdef SMACK_IPV6_PORT_LABELING
 /**
  * smk_ipv6_port_label - Smack port access table management
  * @sock: socket
@@ -2479,48 +2580,43 @@ static void smk_ipv6_port_label(struct socket *sock, struct sockaddr *address)
 static int smk_ipv6_port_check(struct sock *sk, struct sockaddr_in6 *address,
 				int act)
 {
-	__be16 *bep;
-	__be32 *be32p;
 	struct smk_port_label *spp;
 	struct socket_smack *ssp = sk->sk_security;
-	struct smack_known *skp;
-	unsigned short port = 0;
+	struct smack_known *skp = NULL;
+	unsigned short port;
 	struct smack_known *object;
-	struct smk_audit_info ad;
-	int rc;
-#ifdef CONFIG_AUDIT
-	struct lsm_network_audit net;
-#endif
 
 	if (act == SMK_RECEIVING) {
-		skp = smack_net_ambient;
+		skp = smack_ipv6host_label(address);
 		object = ssp->smk_in;
 	} else {
 		skp = ssp->smk_out;
-		object = smack_net_ambient;
+		object = smack_ipv6host_label(address);
 	}
 
 	/*
-	 * Get the IP address and port from the address.
+	 * The other end is a single label host.
 	 */
-	port = ntohs(address->sin6_port);
-	bep = (__be16 *)(&address->sin6_addr);
-	be32p = (__be32 *)(&address->sin6_addr);
+	if (skp != NULL && object != NULL)
+		return smk_ipv6_check(skp, object, address, act);
+	if (skp == NULL)
+		skp = smack_net_ambient;
+	if (object == NULL)
+		object = smack_net_ambient;
 
 	/*
 	 * It's remote, so port lookup does no good.
 	 */
-	if (be32p[0] || be32p[1] || be32p[2] || bep[6] || ntohs(bep[7]) != 1)
-		goto auditout;
+	if (!smk_ipv6_localhost(address))
+		return smk_ipv6_check(skp, object, address, act);
 
 	/*
 	 * It's local so the send check has to have passed.
 	 */
-	if (act == SMK_RECEIVING) {
-		skp = &smack_known_web;
-		goto auditout;
-	}
+	if (act == SMK_RECEIVING)
+		return 0;
 
+	port = ntohs(address->sin6_port);
 	list_for_each_entry(spp, &smk_ipv6_port_list, list) {
 		if (spp->smk_port != port)
 			continue;
@@ -2530,22 +2626,9 @@ static int smk_ipv6_port_check(struct sock *sk, struct sockaddr_in6 *address,
 		break;
 	}
 
-auditout:
-
-#ifdef CONFIG_AUDIT
-	smk_ad_init_net(&ad, __func__, LSM_AUDIT_DATA_NET, &net);
-	ad.a.u.net->family = sk->sk_family;
-	ad.a.u.net->dport = port;
-	if (act == SMK_RECEIVING)
-		ad.a.u.net->v6info.saddr = address->sin6_addr;
-	else
-		ad.a.u.net->v6info.daddr = address->sin6_addr;
-#endif
-	rc = smk_access(skp, object, MAY_WRITE, &ad);
-	rc = smk_bu_note("IPv6 port check", skp, object, MAY_WRITE, rc);
-	return rc;
+	return smk_ipv6_check(skp, object, address, act);
 }
-#endif /* CONFIG_IPV6 && !CONFIG_SECURITY_SMACK_NETFILTER */
+#endif /* SMACK_IPV6_PORT_LABELING */
 
 /**
  * smack_inode_setsecurity - set smack xattrs
@@ -2606,10 +2689,10 @@ static int smack_inode_setsecurity(struct inode *inode, const char *name,
 	} else
 		return -EOPNOTSUPP;
 
-#if IS_ENABLED(CONFIG_IPV6) && !defined(CONFIG_SECURITY_SMACK_NETFILTER)
+#ifdef SMACK_IPV6_PORT_LABELING
 	if (sock->sk->sk_family == PF_INET6)
 		smk_ipv6_port_label(sock, NULL);
-#endif /* CONFIG_IPV6 && !CONFIG_SECURITY_SMACK_NETFILTER */
+#endif
 
 	return 0;
 }
@@ -2651,7 +2734,7 @@ static int smack_socket_post_create(struct socket *sock, int family,
 	return smack_netlabel(sock->sk, SMACK_CIPSO_SOCKET);
 }
 
-#ifndef CONFIG_SECURITY_SMACK_NETFILTER
+#ifdef SMACK_IPV6_PORT_LABELING
 /**
  * smack_socket_bind - record port binding information.
  * @sock: the socket
@@ -2665,14 +2748,11 @@ static int smack_socket_post_create(struct socket *sock, int family,
 static int smack_socket_bind(struct socket *sock, struct sockaddr *address,
 				int addrlen)
 {
-#if IS_ENABLED(CONFIG_IPV6)
 	if (sock->sk != NULL && sock->sk->sk_family == PF_INET6)
 		smk_ipv6_port_label(sock, address);
-#endif
-
 	return 0;
 }
-#endif /* !CONFIG_SECURITY_SMACK_NETFILTER */
+#endif /* SMACK_IPV6_PORT_LABELING */
 
 /**
  * smack_socket_connect - connect access check
@@ -2688,6 +2768,13 @@ static int smack_socket_connect(struct socket *sock, struct sockaddr *sap,
 				int addrlen)
 {
 	int rc = 0;
+#if IS_ENABLED(CONFIG_IPV6)
+	struct sockaddr_in6 *sip = (struct sockaddr_in6 *)sap;
+#endif
+#ifdef SMACK_IPV6_SECMARK_LABELING
+	struct smack_known *rsp;
+	struct socket_smack *ssp = sock->sk->sk_security;
+#endif
 
 	if (sock->sk == NULL)
 		return 0;
@@ -2701,10 +2788,15 @@ static int smack_socket_connect(struct socket *sock, struct sockaddr *sap,
 	case PF_INET6:
 		if (addrlen < sizeof(struct sockaddr_in6))
 			return -EINVAL;
-#if IS_ENABLED(CONFIG_IPV6) && !defined(CONFIG_SECURITY_SMACK_NETFILTER)
-		rc = smk_ipv6_port_check(sock->sk, (struct sockaddr_in6 *)sap,
+#ifdef SMACK_IPV6_SECMARK_LABELING
+		rsp = smack_ipv6host_label(sip);
+		if (rsp != NULL)
+			rc = smk_ipv6_check(ssp->smk_out, rsp, sip,
 						SMK_CONNECTING);
-#endif /* CONFIG_IPV6 && !CONFIG_SECURITY_SMACK_NETFILTER */
+#endif
+#ifdef SMACK_IPV6_PORT_LABELING
+		rc = smk_ipv6_port_check(sock->sk, sip, SMK_CONNECTING);
+#endif
 		break;
 	}
 	return rc;
@@ -3590,9 +3682,13 @@ static int smack_socket_sendmsg(struct socket *sock, struct msghdr *msg,
 				int size)
 {
 	struct sockaddr_in *sip = (struct sockaddr_in *) msg->msg_name;
-#if IS_ENABLED(CONFIG_IPV6) && !defined(CONFIG_SECURITY_SMACK_NETFILTER)
+#if IS_ENABLED(CONFIG_IPV6)
 	struct sockaddr_in6 *sap = (struct sockaddr_in6 *) msg->msg_name;
-#endif /* CONFIG_IPV6 && !CONFIG_SECURITY_SMACK_NETFILTER */
+#endif
+#ifdef SMACK_IPV6_SECMARK_LABELING
+	struct socket_smack *ssp = sock->sk->sk_security;
+	struct smack_known *rsp;
+#endif
 	int rc = 0;
 
 	/*
@@ -3606,9 +3702,15 @@ static int smack_socket_sendmsg(struct socket *sock, struct msghdr *msg,
 		rc = smack_netlabel_send(sock->sk, sip);
 		break;
 	case AF_INET6:
-#if IS_ENABLED(CONFIG_IPV6) && !defined(CONFIG_SECURITY_SMACK_NETFILTER)
+#ifdef SMACK_IPV6_SECMARK_LABELING
+		rsp = smack_ipv6host_label(sap);
+		if (rsp != NULL)
+			rc = smk_ipv6_check(ssp->smk_out, rsp, sap,
+						SMK_CONNECTING);
+#endif
+#ifdef SMACK_IPV6_PORT_LABELING
 		rc = smk_ipv6_port_check(sock->sk, sap, SMK_SENDING);
-#endif /* CONFIG_IPV6 && !CONFIG_SECURITY_SMACK_NETFILTER */
+#endif
 		break;
 	}
 	return rc;
@@ -3822,10 +3924,12 @@ static int smack_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
 		proto = smk_skb_to_addr_ipv6(skb, &sadd);
 		if (proto != IPPROTO_UDP && proto != IPPROTO_TCP)
 			break;
-#ifdef CONFIG_SECURITY_SMACK_NETFILTER
+#ifdef SMACK_IPV6_SECMARK_LABELING
 		if (skb && skb->secmark != 0)
 			skp = smack_from_secid(skb->secmark);
 		else
+			skp = smack_ipv6host_label(&sadd);
+		if (skp == NULL)
 			skp = smack_net_ambient;
 #ifdef CONFIG_AUDIT
 		smk_ad_init_net(&ad, __func__, LSM_AUDIT_DATA_NET, &net);
@@ -3836,9 +3940,10 @@ static int smack_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
 		rc = smk_access(skp, ssp->smk_in, MAY_WRITE, &ad);
 		rc = smk_bu_note("IPv6 delivery", skp, ssp->smk_in,
 					MAY_WRITE, rc);
-#else /* CONFIG_SECURITY_SMACK_NETFILTER */
+#endif /* SMACK_IPV6_SECMARK_LABELING */
+#ifdef SMACK_IPV6_PORT_LABELING
 		rc = smk_ipv6_port_check(sk, &sadd, SMK_RECEIVING);
-#endif /* CONFIG_SECURITY_SMACK_NETFILTER */
+#endif /* SMACK_IPV6_PORT_LABELING */
 		break;
 #endif /* CONFIG_IPV6 */
 	}
@@ -3936,13 +4041,11 @@ static int smack_socket_getpeersec_dgram(struct socket *sock,
 		}
 		netlbl_secattr_destroy(&secattr);
 		break;
-#if IS_ENABLED(CONFIG_IPV6)
 	case PF_INET6:
-#ifdef CONFIG_SECURITY_SMACK_NETFILTER
+#ifdef SMACK_IPV6_SECMARK_LABELING
 		s = skb->secmark;
-#endif /* CONFIG_SECURITY_SMACK_NETFILTER */
+#endif
 		break;
-#endif /* CONFIG_IPV6 */
 	}
 	*secid = s;
 	if (s == 0)
@@ -4065,7 +4168,7 @@ static int smack_inet_conn_request(struct sock *sk, struct sk_buff *skb,
 	hdr = ip_hdr(skb);
 	addr.sin_addr.s_addr = hdr->saddr;
 	rcu_read_lock();
-	hskp = smack_host_label(&addr);
+	hskp = smack_ipv4host_label(&addr);
 	rcu_read_unlock();
 
 	if (hskp == NULL)
@@ -4517,9 +4620,9 @@ struct security_hook_list smack_hooks[] = {
 	LSM_HOOK_INIT(unix_may_send, smack_unix_may_send),
 
 	LSM_HOOK_INIT(socket_post_create, smack_socket_post_create),
-#ifndef CONFIG_SECURITY_SMACK_NETFILTER
+#ifdef SMACK_IPV6_PORT_LABELING
 	LSM_HOOK_INIT(socket_bind, smack_socket_bind),
-#endif /* CONFIG_SECURITY_SMACK_NETFILTER */
+#endif
 	LSM_HOOK_INIT(socket_connect, smack_socket_connect),
 	LSM_HOOK_INIT(socket_sendmsg, smack_socket_sendmsg),
 	LSM_HOOK_INIT(socket_sock_rcv_skb, smack_socket_sock_rcv_skb),
@@ -4614,7 +4717,16 @@ static __init int smack_init(void)
 		return -ENOMEM;
 	}
 
-	printk(KERN_INFO "Smack:  Initializing.\n");
+	pr_info("Smack:  Initializing.\n");
+#ifdef CONFIG_SECURITY_SMACK_NETFILTER
+	pr_info("Smack:  Netfilter enabled.\n");
+#endif
+#ifdef SMACK_IPV6_PORT_LABELING
+	pr_info("Smack:  IPv6 port labeling enabled.\n");
+#endif
+#ifdef SMACK_IPV6_SECMARK_LABELING
+	pr_info("Smack:  IPv6 Netfilter enabled.\n");
+#endif
 
 	/*
 	 * Set the security state for the initial task.
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index 81a2888a990863..11b752b366eabe 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -29,6 +29,7 @@
 #include <linux/magic.h>
 #include "smack.h"
 
+#define BEBITS	(sizeof(__be32) * 8)
 /*
  * smackfs pseudo filesystem.
  */
@@ -40,7 +41,7 @@ enum smk_inos {
 	SMK_DOI		= 5,	/* CIPSO DOI */
 	SMK_DIRECT	= 6,	/* CIPSO level indicating direct label */
 	SMK_AMBIENT	= 7,	/* internet ambient label */
-	SMK_NETLBLADDR	= 8,	/* single label hosts */
+	SMK_NET4ADDR	= 8,	/* single label hosts */
 	SMK_ONLYCAP	= 9,	/* the only "capable" label */
 	SMK_LOGGING	= 10,	/* logging */
 	SMK_LOAD_SELF	= 11,	/* task specific rules */
@@ -57,6 +58,9 @@ enum smk_inos {
 #ifdef CONFIG_SECURITY_SMACK_BRINGUP
 	SMK_UNCONFINED	= 22,	/* define an unconfined label */
 #endif
+#if IS_ENABLED(CONFIG_IPV6)
+	SMK_NET6ADDR	= 23,	/* single label IPv6 hosts */
+#endif /* CONFIG_IPV6 */
 };
 
 /*
@@ -64,7 +68,10 @@ enum smk_inos {
  */
 static DEFINE_MUTEX(smack_cipso_lock);
 static DEFINE_MUTEX(smack_ambient_lock);
-static DEFINE_MUTEX(smk_netlbladdr_lock);
+static DEFINE_MUTEX(smk_net4addr_lock);
+#if IS_ENABLED(CONFIG_IPV6)
+static DEFINE_MUTEX(smk_net6addr_lock);
+#endif /* CONFIG_IPV6 */
 
 /*
  * This is the "ambient" label for network traffic.
@@ -118,7 +125,10 @@ int smack_ptrace_rule = SMACK_PTRACE_DEFAULT;
  * can write to the specified label.
  */
 
-LIST_HEAD(smk_netlbladdr_list);
+LIST_HEAD(smk_net4addr_list);
+#if IS_ENABLED(CONFIG_IPV6)
+LIST_HEAD(smk_net6addr_list);
+#endif /* CONFIG_IPV6 */
 
 /*
  * Rule lists are maintained for each label.
@@ -140,11 +150,6 @@ struct smack_parsed_rule {
 
 static int smk_cipso_doi_value = SMACK_CIPSO_DOI_DEFAULT;
 
-struct smack_known smack_cipso_option = {
-	.smk_known	= SMACK_CIPSO_OPTION,
-	.smk_secid	= 0,
-};
-
 /*
  * Values for parsing cipso rules
  * SMK_DIGITLEN: Length of a digit field in a rule.
@@ -1047,92 +1052,90 @@ static const struct file_operations smk_cipso2_ops = {
  * Seq_file read operations for /smack/netlabel
  */
 
-static void *netlbladdr_seq_start(struct seq_file *s, loff_t *pos)
+static void *net4addr_seq_start(struct seq_file *s, loff_t *pos)
 {
-	return smk_seq_start(s, pos, &smk_netlbladdr_list);
+	return smk_seq_start(s, pos, &smk_net4addr_list);
 }
 
-static void *netlbladdr_seq_next(struct seq_file *s, void *v, loff_t *pos)
+static void *net4addr_seq_next(struct seq_file *s, void *v, loff_t *pos)
 {
-	return smk_seq_next(s, v, pos, &smk_netlbladdr_list);
+	return smk_seq_next(s, v, pos, &smk_net4addr_list);
 }
-#define BEBITS	(sizeof(__be32) * 8)
 
 /*
  * Print host/label pairs
  */
-static int netlbladdr_seq_show(struct seq_file *s, void *v)
+static int net4addr_seq_show(struct seq_file *s, void *v)
 {
 	struct list_head *list = v;
-	struct smk_netlbladdr *skp =
-			list_entry_rcu(list, struct smk_netlbladdr, list);
-	unsigned char *hp = (char *) &skp->smk_host.sin_addr.s_addr;
-	int maskn;
-	u32 temp_mask = be32_to_cpu(skp->smk_mask.s_addr);
-
-	for (maskn = 0; temp_mask; temp_mask <<= 1, maskn++);
+	struct smk_net4addr *skp =
+			list_entry_rcu(list, struct smk_net4addr, list);
+	char *kp = SMACK_CIPSO_OPTION;
 
-	seq_printf(s, "%u.%u.%u.%u/%d %s\n",
-		hp[0], hp[1], hp[2], hp[3], maskn, skp->smk_label->smk_known);
+	if (skp->smk_label != NULL)
+		kp = skp->smk_label->smk_known;
+	seq_printf(s, "%pI4/%d %s\n", &skp->smk_host.s_addr,
+			skp->smk_masks, kp);
 
 	return 0;
 }
 
-static const struct seq_operations netlbladdr_seq_ops = {
-	.start = netlbladdr_seq_start,
-	.next  = netlbladdr_seq_next,
-	.show  = netlbladdr_seq_show,
+static const struct seq_operations net4addr_seq_ops = {
+	.start = net4addr_seq_start,
+	.next  = net4addr_seq_next,
+	.show  = net4addr_seq_show,
 	.stop  = smk_seq_stop,
 };
 
 /**
- * smk_open_netlbladdr - open() for /smack/netlabel
+ * smk_open_net4addr - open() for /smack/netlabel
  * @inode: inode structure representing file
  * @file: "netlabel" file pointer
  *
- * Connect our netlbladdr_seq_* operations with /smack/netlabel
+ * Connect our net4addr_seq_* operations with /smack/netlabel
  * file_operations
  */
-static int smk_open_netlbladdr(struct inode *inode, struct file *file)
+static int smk_open_net4addr(struct inode *inode, struct file *file)
 {
-	return seq_open(file, &netlbladdr_seq_ops);
+	return seq_open(file, &net4addr_seq_ops);
 }
 
 /**
- * smk_netlbladdr_insert
+ * smk_net4addr_insert
  * @new : netlabel to insert
  *
- * This helper insert netlabel in the smack_netlbladdrs list
+ * This helper insert netlabel in the smack_net4addrs list
  * sorted by netmask length (longest to smallest)
- * locked by &smk_netlbladdr_lock in smk_write_netlbladdr
+ * locked by &smk_net4addr_lock in smk_write_net4addr
  *
  */
-static void smk_netlbladdr_insert(struct smk_netlbladdr *new)
+static void smk_net4addr_insert(struct smk_net4addr *new)
 {
-	struct smk_netlbladdr *m, *m_next;
+	struct smk_net4addr *m;
+	struct smk_net4addr *m_next;
 
-	if (list_empty(&smk_netlbladdr_list)) {
-		list_add_rcu(&new->list, &smk_netlbladdr_list);
+	if (list_empty(&smk_net4addr_list)) {
+		list_add_rcu(&new->list, &smk_net4addr_list);
 		return;
 	}
 
-	m = list_entry_rcu(smk_netlbladdr_list.next,
-			   struct smk_netlbladdr, list);
+	m = list_entry_rcu(smk_net4addr_list.next,
+			   struct smk_net4addr, list);
 
 	/* the comparison '>' is a bit hacky, but works */
-	if (new->smk_mask.s_addr > m->smk_mask.s_addr) {
-		list_add_rcu(&new->list, &smk_netlbladdr_list);
+	if (new->smk_masks > m->smk_masks) {
+		list_add_rcu(&new->list, &smk_net4addr_list);
 		return;
 	}
 
-	list_for_each_entry_rcu(m, &smk_netlbladdr_list, list) {
-		if (list_is_last(&m->list, &smk_netlbladdr_list)) {
+	list_for_each_entry_rcu(m, &smk_net4addr_list, list) {
+		if (list_is_last(&m->list, &smk_net4addr_list)) {
 			list_add_rcu(&new->list, &m->list);
 			return;
 		}
 		m_next = list_entry_rcu(m->list.next,
-					struct smk_netlbladdr, list);
-		if (new->smk_mask.s_addr > m_next->smk_mask.s_addr) {
+					struct smk_net4addr, list);
+		if (new->smk_masks > m_next->smk_masks) {
 			list_add_rcu(&new->list, &m->list);
 			return;
 		}
@@ -1141,28 +1144,29 @@ static void smk_netlbladdr_insert(struct smk_netlbladdr *new)
 
 
 /**
- * smk_write_netlbladdr - write() for /smack/netlabel
+ * smk_write_net4addr - write() for /smack/netlabel
  * @file: file pointer, not actually used
  * @buf: where to get the data from
  * @count: bytes sent
  * @ppos: where to start
  *
- * Accepts only one netlbladdr per write call.
+ * Accepts only one net4addr per write call.
  * Returns number of bytes written or error code, as appropriate
  */
-static ssize_t smk_write_netlbladdr(struct file *file, const char __user *buf,
+static ssize_t smk_write_net4addr(struct file *file, const char __user *buf,
 				size_t count, loff_t *ppos)
 {
-	struct smk_netlbladdr *snp;
+	struct smk_net4addr *snp;
 	struct sockaddr_in newname;
 	char *smack;
-	struct smack_known *skp;
+	struct smack_known *skp = NULL;
 	char *data;
 	char *host = (char *)&newname.sin_addr.s_addr;
 	int rc;
 	struct netlbl_audit audit_info;
 	struct in_addr mask;
 	unsigned int m;
+	unsigned int masks;
 	int found;
 	u32 mask_bits = (1<<31);
 	__be32 nsa;
@@ -1200,7 +1204,7 @@ static ssize_t smk_write_netlbladdr(struct file *file, const char __user *buf,
 	data[count] = '\0';
 
 	rc = sscanf(data, "%hhd.%hhd.%hhd.%hhd/%u %s",
-		&host[0], &host[1], &host[2], &host[3], &m, smack);
+		&host[0], &host[1], &host[2], &host[3], &masks, smack);
 	if (rc != 6) {
 		rc = sscanf(data, "%hhd.%hhd.%hhd.%hhd %s",
 			&host[0], &host[1], &host[2], &host[3], smack);
@@ -1209,8 +1213,9 @@ static ssize_t smk_write_netlbladdr(struct file *file, const char __user *buf,
 			goto free_out;
 		}
 		m = BEBITS;
+		masks = 32;
 	}
-	if (m > BEBITS) {
+	if (masks > BEBITS) {
 		rc = -EINVAL;
 		goto free_out;
 	}
@@ -1225,16 +1230,16 @@ static ssize_t smk_write_netlbladdr(struct file *file, const char __user *buf,
 			goto free_out;
 		}
 	} else {
-		/* check known options */
-		if (strcmp(smack, smack_cipso_option.smk_known) == 0)
-			skp = &smack_cipso_option;
-		else {
+		/*
+		 * Only the -CIPSO option is supported for IPv4
+		 */
+		if (strcmp(smack, SMACK_CIPSO_OPTION) != 0) {
 			rc = -EINVAL;
 			goto free_out;
 		}
 	}
 
-	for (temp_mask = 0; m > 0; m--) {
+	for (m = masks, temp_mask = 0; m > 0; m--) {
 		temp_mask |= mask_bits;
 		mask_bits >>= 1;
 	}
@@ -1245,14 +1250,13 @@ static ssize_t smk_write_netlbladdr(struct file *file, const char __user *buf,
 	 * Only allow one writer at a time. Writes should be
 	 * quite rare and small in any case.
 	 */
-	mutex_lock(&smk_netlbladdr_lock);
+	mutex_lock(&smk_net4addr_lock);
 
 	nsa = newname.sin_addr.s_addr;
 	/* try to find if the prefix is already in the list */
 	found = 0;
-	list_for_each_entry_rcu(snp, &smk_netlbladdr_list, list) {
-		if (snp->smk_host.sin_addr.s_addr == nsa &&
-		    snp->smk_mask.s_addr == mask.s_addr) {
+	list_for_each_entry_rcu(snp, &smk_net4addr_list, list) {
+		if (snp->smk_host.s_addr == nsa && snp->smk_masks == masks) {
 			found = 1;
 			break;
 		}
@@ -1265,17 +1269,20 @@ static ssize_t smk_write_netlbladdr(struct file *file, const char __user *buf,
 			rc = -ENOMEM;
 		else {
 			rc = 0;
-			snp->smk_host.sin_addr.s_addr = newname.sin_addr.s_addr;
+			snp->smk_host.s_addr = newname.sin_addr.s_addr;
 			snp->smk_mask.s_addr = mask.s_addr;
 			snp->smk_label = skp;
-			smk_netlbladdr_insert(snp);
+			snp->smk_masks = masks;
+			smk_net4addr_insert(snp);
 		}
 	} else {
-		/* we delete the unlabeled entry, only if the previous label
-		 * wasn't the special CIPSO option */
-		if (snp->smk_label != &smack_cipso_option)
+		/*
+		 * Delete the unlabeled entry, only if the previous label
+		 * wasn't the special CIPSO option
+		 */
+		if (snp->smk_label != NULL)
 			rc = netlbl_cfg_unlbl_static_del(&init_net, NULL,
-					&snp->smk_host.sin_addr, &snp->smk_mask,
+					&snp->smk_host, &snp->smk_mask,
 					PF_INET, &audit_info);
 		else
 			rc = 0;
@@ -1287,15 +1294,279 @@ static ssize_t smk_write_netlbladdr(struct file *file, const char __user *buf,
 	 * this host so that incoming packets get labeled.
 	 * but only if we didn't get the special CIPSO option
 	 */
-	if (rc == 0 && skp != &smack_cipso_option)
+	if (rc == 0 && skp != NULL)
 		rc = netlbl_cfg_unlbl_static_add(&init_net, NULL,
-			&snp->smk_host.sin_addr, &snp->smk_mask, PF_INET,
+			&snp->smk_host, &snp->smk_mask, PF_INET,
 			snp->smk_label->smk_secid, &audit_info);
 
 	if (rc == 0)
 		rc = count;
 
-	mutex_unlock(&smk_netlbladdr_lock);
+	mutex_unlock(&smk_net4addr_lock);
+
+free_out:
+	kfree(smack);
+free_data_out:
+	kfree(data);
+
+	return rc;
+}
+
+static const struct file_operations smk_net4addr_ops = {
+	.open           = smk_open_net4addr,
+	.read		= seq_read,
+	.llseek         = seq_lseek,
+	.write		= smk_write_net4addr,
+	.release        = seq_release,
+};
+
+#if IS_ENABLED(CONFIG_IPV6)
+/*
+ * Seq_file read operations for /smack/netlabel6
+ */
+
+static void *net6addr_seq_start(struct seq_file *s, loff_t *pos)
+{
+	return smk_seq_start(s, pos, &smk_net6addr_list);
+}
+
+static void *net6addr_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+	return smk_seq_next(s, v, pos, &smk_net6addr_list);
+}
+
+/*
+ * Print host/label pairs
+ */
+static int net6addr_seq_show(struct seq_file *s, void *v)
+{
+	struct list_head *list = v;
+	struct smk_net6addr *skp =
+			 list_entry(list, struct smk_net6addr, list);
+
+	if (skp->smk_label != NULL)
+		seq_printf(s, "%pI6/%d %s\n", &skp->smk_host, skp->smk_masks,
+				skp->smk_label->smk_known);
+
+	return 0;
+}
+
+static const struct seq_operations net6addr_seq_ops = {
+	.start = net6addr_seq_start,
+	.next  = net6addr_seq_next,
+	.show  = net6addr_seq_show,
+	.stop  = smk_seq_stop,
+};
+
+/**
+ * smk_open_net6addr - open() for /smack/netlabel
+ * @inode: inode structure representing file
+ * @file: "netlabel" file pointer
+ *
+ * Connect our net6addr_seq_* operations with /smack/netlabel
+ * file_operations
+ */
+static int smk_open_net6addr(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &net6addr_seq_ops);
+}
+
+/**
+ * smk_net6addr_insert
+ * @new : entry to insert
+ *
+ * This inserts an entry in the smack_net6addrs list
+ * sorted by netmask length (longest to smallest)
+ * locked by &smk_net6addr_lock in smk_write_net6addr
+ *
+ */
+static void smk_net6addr_insert(struct smk_net6addr *new)
+{
+	struct smk_net6addr *m_next;
+	struct smk_net6addr *m;
+
+	if (list_empty(&smk_net6addr_list)) {
+		list_add_rcu(&new->list, &smk_net6addr_list);
+		return;
+	}
+
+	m = list_entry_rcu(smk_net6addr_list.next,
+			   struct smk_net6addr, list);
+
+	if (new->smk_masks > m->smk_masks) {
+		list_add_rcu(&new->list, &smk_net6addr_list);
+		return;
+	}
+
+	list_for_each_entry_rcu(m, &smk_net6addr_list, list) {
+		if (list_is_last(&m->list, &smk_net6addr_list)) {
+			list_add_rcu(&new->list, &m->list);
+			return;
+		}
+		m_next = list_entry_rcu(m->list.next,
+					struct smk_net6addr, list);
+		if (new->smk_masks > m_next->smk_masks) {
+			list_add_rcu(&new->list, &m->list);
+			return;
+		}
+	}
+}
+
+
+/**
+ * smk_write_net6addr - write() for /smack/netlabel
+ * @file: file pointer, not actually used
+ * @buf: where to get the data from
+ * @count: bytes sent
+ * @ppos: where to start
+ *
+ * Accepts only one net6addr per write call.
+ * Returns number of bytes written or error code, as appropriate
+ */
+static ssize_t smk_write_net6addr(struct file *file, const char __user *buf,
+				size_t count, loff_t *ppos)
+{
+	struct smk_net6addr *snp;
+	struct in6_addr newname;
+	struct in6_addr fullmask;
+	struct smack_known *skp = NULL;
+	char *smack;
+	char *data;
+	int rc = 0;
+	int found = 0;
+	int i;
+	unsigned int scanned[8];
+	unsigned int m;
+	unsigned int mask = 128;
+
+	/*
+	 * Must have privilege.
+	 * No partial writes.
+	 * Enough data must be present.
+	 * "<addr/mask, as a:b:c:d:e:f:g:h/e><space><label>"
+	 * "<addr, as a:b:c:d:e:f:g:h><space><label>"
+	 */
+	if (!smack_privileged(CAP_MAC_ADMIN))
+		return -EPERM;
+	if (*ppos != 0)
+		return -EINVAL;
+	if (count < SMK_NETLBLADDRMIN)
+		return -EINVAL;
+
+	data = kzalloc(count + 1, GFP_KERNEL);
+	if (data == NULL)
+		return -ENOMEM;
+
+	if (copy_from_user(data, buf, count) != 0) {
+		rc = -EFAULT;
+		goto free_data_out;
+	}
+
+	smack = kzalloc(count + 1, GFP_KERNEL);
+	if (smack == NULL) {
+		rc = -ENOMEM;
+		goto free_data_out;
+	}
+
+	data[count] = '\0';
+
+	i = sscanf(data, "%x:%x:%x:%x:%x:%x:%x:%x/%u %s",
+			&scanned[0], &scanned[1], &scanned[2], &scanned[3],
+			&scanned[4], &scanned[5], &scanned[6], &scanned[7],
+			&mask, smack);
+	if (i != 10) {
+		i = sscanf(data, "%x:%x:%x:%x:%x:%x:%x:%x %s",
+				&scanned[0], &scanned[1], &scanned[2],
+				&scanned[3], &scanned[4], &scanned[5],
+				&scanned[6], &scanned[7], smack);
+		if (i != 9) {
+			rc = -EINVAL;
+			goto free_out;
+		}
+	}
+	if (mask > 128) {
+		rc = -EINVAL;
+		goto free_out;
+	}
+	for (i = 0; i < 8; i++) {
+		if (scanned[i] > 0xffff) {
+			rc = -EINVAL;
+			goto free_out;
+		}
+		newname.s6_addr16[i] = htons(scanned[i]);
+	}
+
+	/*
+	 * If smack begins with '-', it is an option, don't import it
+	 */
+	if (smack[0] != '-') {
+		skp = smk_import_entry(smack, 0);
+		if (skp == NULL) {
+			rc = -EINVAL;
+			goto free_out;
+		}
+	} else {
+		/*
+		 * Only -DELETE is supported for IPv6
+		 */
+		if (strcmp(smack, SMACK_DELETE_OPTION) != 0) {
+			rc = -EINVAL;
+			goto free_out;
+		}
+	}
+
+	for (i = 0, m = mask; i < 8; i++) {
+		if (m >= 16) {
+			fullmask.s6_addr16[i] = 0xffff;
+			m -= 16;
+		} else if (m > 0) {
+			fullmask.s6_addr16[i] = (1 << m) - 1;
+			m = 0;
+		} else
+			fullmask.s6_addr16[i] = 0;
+		newname.s6_addr16[i] &= fullmask.s6_addr16[i];
+	}
+
+	/*
+	 * Only allow one writer at a time. Writes should be
+	 * quite rare and small in any case.
+	 */
+	mutex_lock(&smk_net6addr_lock);
+	/*
+	 * Try to find the prefix in the list
+	 */
+	list_for_each_entry_rcu(snp, &smk_net6addr_list, list) {
+		if (mask != snp->smk_masks)
+			continue;
+		for (found = 1, i = 0; i < 8; i++) {
+			if (newname.s6_addr16[i] !=
+			    snp->smk_host.s6_addr16[i]) {
+				found = 0;
+				break;
+			}
+		}
+		if (found == 1)
+			break;
+	}
+	if (found == 0) {
+		snp = kzalloc(sizeof(*snp), GFP_KERNEL);
+		if (snp == NULL)
+			rc = -ENOMEM;
+		else {
+			snp->smk_host = newname;
+			snp->smk_mask = fullmask;
+			snp->smk_masks = mask;
+			snp->smk_label = skp;
+			smk_net6addr_insert(snp);
+		}
+	} else {
+		snp->smk_label = skp;
+	}
+
+	if (rc == 0)
+		rc = count;
+
+	mutex_unlock(&smk_net6addr_lock);
 
 free_out:
 	kfree(smack);
@@ -1305,13 +1576,14 @@ static ssize_t smk_write_netlbladdr(struct file *file, const char __user *buf,
 	return rc;
 }
 
-static const struct file_operations smk_netlbladdr_ops = {
-	.open           = smk_open_netlbladdr,
+static const struct file_operations smk_net6addr_ops = {
+	.open           = smk_open_net6addr,
 	.read		= seq_read,
 	.llseek         = seq_lseek,
-	.write		= smk_write_netlbladdr,
+	.write		= smk_write_net6addr,
 	.release        = seq_release,
 };
+#endif /* CONFIG_IPV6 */
 
 /**
  * smk_read_doi - read() for /smack/doi
@@ -2515,8 +2787,8 @@ static int smk_fill_super(struct super_block *sb, void *data, int silent)
 			"direct", &smk_direct_ops, S_IRUGO|S_IWUSR},
 		[SMK_AMBIENT] = {
 			"ambient", &smk_ambient_ops, S_IRUGO|S_IWUSR},
-		[SMK_NETLBLADDR] = {
-			"netlabel", &smk_netlbladdr_ops, S_IRUGO|S_IWUSR},
+		[SMK_NET4ADDR] = {
+			"netlabel", &smk_net4addr_ops, S_IRUGO|S_IWUSR},
 		[SMK_ONLYCAP] = {
 			"onlycap", &smk_onlycap_ops, S_IRUGO|S_IWUSR},
 		[SMK_LOGGING] = {
@@ -2548,6 +2820,10 @@ static int smk_fill_super(struct super_block *sb, void *data, int silent)
 		[SMK_UNCONFINED] = {
 			"unconfined", &smk_unconfined_ops, S_IRUGO|S_IWUSR},
 #endif
+#if IS_ENABLED(CONFIG_IPV6)
+		[SMK_NET6ADDR] = {
+			"ipv6host", &smk_net6addr_ops, S_IRUGO|S_IWUSR},
+#endif /* CONFIG_IPV6 */
 		/* last one */
 			{""}
 	};

From 3e07e5baa2af166a6f38a51e14ff036c341a57c7 Mon Sep 17 00:00:00 2001
From: Azael Avalos <coproscefalo@gmail.com>
Date: Mon, 27 Jul 2015 19:22:23 -0600
Subject: [PATCH 064/734] toshiba_acpi: Add set_fan_status function

This patch adds a new function named "set_fan_status" to complement
its get* counterpart, as well as to avoid code duplication between
"fan_proc_write" and "fan_store".

Also, both functions (get*, set*) are now checking for TOS_FAILURE,
TOS_NOT_SUPPORTED and TOS_SUCCESS (to be on par with the rest of the
HCI/SCI functions), printing an error message, returning -ENODEV and
zero respectively.

The proc and sysfs functions were updated to reflect these changes as
well, returning -EIO for proc, and propagating the error value on the
sysfs functions.

Signed-off-by: Azael Avalos <coproscefalo@gmail.com>
Signed-off-by: Darren Hart <dvhart@linux.intel.com>
---
 drivers/platform/x86/toshiba_acpi.c | 66 +++++++++++++++++------------
 1 file changed, 40 insertions(+), 26 deletions(-)

diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index 3bfdfddc38ac3d..f7228987bd509f 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -1422,27 +1422,47 @@ static const struct file_operations video_proc_fops = {
 	.write		= video_proc_write,
 };
 
+/* Fan status */
 static int get_fan_status(struct toshiba_acpi_dev *dev, u32 *status)
 {
-	u32 hci_result;
+	u32 result = hci_read(dev, HCI_FAN, status);
 
-	hci_result = hci_read(dev, HCI_FAN, status);
-	return hci_result == TOS_SUCCESS ? 0 : -EIO;
+	if (result == TOS_FAILURE)
+		pr_err("ACPI call to get Fan status failed\n");
+	else if (result == TOS_NOT_SUPPORTED)
+		return -ENODEV;
+	else if (result == TOS_SUCCESS)
+		return 0;
+
+	return -EIO;
+}
+
+static int set_fan_status(struct toshiba_acpi_dev *dev, u32 status)
+{
+	u32 result = hci_write(dev, HCI_FAN, status);
+
+	if (result == TOS_FAILURE)
+		pr_err("ACPI call to set Fan status failed\n");
+	else if (result == TOS_NOT_SUPPORTED)
+		return -ENODEV;
+	else if (result == TOS_SUCCESS)
+		return 0;
+
+	return -EIO;
 }
 
 static int fan_proc_show(struct seq_file *m, void *v)
 {
 	struct toshiba_acpi_dev *dev = m->private;
-	int ret;
 	u32 value;
 
-	ret = get_fan_status(dev, &value);
-	if (!ret) {
-		seq_printf(m, "running:                 %d\n", (value > 0));
-		seq_printf(m, "force_on:                %d\n", dev->force_fan);
-	}
+	if (get_fan_status(dev, &value))
+		return -EIO;
 
-	return ret;
+	seq_printf(m, "running:                 %d\n", (value > 0));
+	seq_printf(m, "force_on:                %d\n", dev->force_fan);
+
+	return 0;
 }
 
 static int fan_proc_open(struct inode *inode, struct file *file)
@@ -1457,23 +1477,20 @@ static ssize_t fan_proc_write(struct file *file, const char __user *buf,
 	char cmd[42];
 	size_t len;
 	int value;
-	u32 hci_result;
 
 	len = min(count, sizeof(cmd) - 1);
 	if (copy_from_user(cmd, buf, len))
 		return -EFAULT;
 	cmd[len] = '\0';
 
-	if (sscanf(cmd, " force_on : %i", &value) == 1 &&
-	    value >= 0 && value <= 1) {
-		hci_result = hci_write(dev, HCI_FAN, value);
-		if (hci_result == TOS_SUCCESS)
-			dev->force_fan = value;
-		else
-			return -EIO;
-	} else {
+	if (sscanf(cmd, " force_on : %i", &value) != 1 &&
+	    value != 0 && value != 1)
 		return -EINVAL;
-	}
+
+	if (set_fan_status(dev, value))
+		return -EIO;
+
+	dev->force_fan = value;
 
 	return count;
 }
@@ -1610,7 +1627,6 @@ static ssize_t fan_store(struct device *dev,
 			 const char *buf, size_t count)
 {
 	struct toshiba_acpi_dev *toshiba = dev_get_drvdata(dev);
-	u32 result;
 	int state;
 	int ret;
 
@@ -1621,11 +1637,9 @@ static ssize_t fan_store(struct device *dev,
 	if (state != 0 && state != 1)
 		return -EINVAL;
 
-	result = hci_write(toshiba, HCI_FAN, state);
-	if (result == TOS_FAILURE)
-		return -EIO;
-	else if (result == TOS_NOT_SUPPORTED)
-		return -ENODEV;
+	ret = set_fan_status(toshiba, state);
+	if (ret)
+		return ret;
 
 	return count;
 }

From ccd6385dfbb7b2f2e6670b5cfc55bb7ec0aa3839 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 15 Jul 2015 11:55:18 +0100
Subject: [PATCH 065/734] iommu/arm-smmu: Fix enabling of PRIQ interrupt

When an ARM SMMUv3 instance supports PRI, the driver registers
an interrupt handler, but fails to enable the generation of
such interrupt at the SMMU level.

This patches simply moves the enable flags to a variable that
gets updated by the PRI handling code before being written to the
SMMU register.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/arm-smmu-v3.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index da902baaa7946a..5d2cbdab5afa6b 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -118,6 +118,7 @@
 
 #define ARM_SMMU_IRQ_CTRL		0x50
 #define IRQ_CTRL_EVTQ_IRQEN		(1 << 2)
+#define IRQ_CTRL_PRIQ_IRQEN		(1 << 1)
 #define IRQ_CTRL_GERROR_IRQEN		(1 << 0)
 
 #define ARM_SMMU_IRQ_CTRLACK		0x54
@@ -2198,6 +2199,7 @@ static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
 {
 	int ret, irq;
+	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
 
 	/* Disable IRQs first */
 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
@@ -2252,13 +2254,13 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
 			if (IS_ERR_VALUE(ret))
 				dev_warn(smmu->dev,
 					 "failed to enable priq irq\n");
+			else
+				irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
 		}
 	}
 
 	/* Enable interrupt generation on the SMMU */
-	ret = arm_smmu_write_reg_sync(smmu,
-				      IRQ_CTRL_EVTQ_IRQEN |
-				      IRQ_CTRL_GERROR_IRQEN,
+	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
 	if (ret)
 		dev_warn(smmu->dev, "failed to enable irqs\n");

From ec11d63c677bbba15e65a35f5ba06c1d6eba4dbe Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 15 Jul 2015 11:55:19 +0100
Subject: [PATCH 066/734] iommu/arm-smmu: Fix MSI memory attributes to match
 specification

The MSI memory attributes in the SMMUv3 driver are from an older
revision of the spec, which doesn't match the current implementations.

Out with the old, in with the new.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/arm-smmu-v3.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 5d2cbdab5afa6b..c2c1ad8915d990 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -174,14 +174,14 @@
 #define ARM_SMMU_PRIQ_IRQ_CFG2		0xdc
 
 /* Common MSI config fields */
-#define MSI_CFG0_SH_SHIFT		60
-#define MSI_CFG0_SH_NSH			(0UL << MSI_CFG0_SH_SHIFT)
-#define MSI_CFG0_SH_OSH			(2UL << MSI_CFG0_SH_SHIFT)
-#define MSI_CFG0_SH_ISH			(3UL << MSI_CFG0_SH_SHIFT)
-#define MSI_CFG0_MEMATTR_SHIFT		56
-#define MSI_CFG0_MEMATTR_DEVICE_nGnRE	(0x1 << MSI_CFG0_MEMATTR_SHIFT)
 #define MSI_CFG0_ADDR_SHIFT		2
 #define MSI_CFG0_ADDR_MASK		0x3fffffffffffUL
+#define MSI_CFG2_SH_SHIFT		4
+#define MSI_CFG2_SH_NSH			(0UL << MSI_CFG2_SH_SHIFT)
+#define MSI_CFG2_SH_OSH			(2UL << MSI_CFG2_SH_SHIFT)
+#define MSI_CFG2_SH_ISH			(3UL << MSI_CFG2_SH_SHIFT)
+#define MSI_CFG2_MEMATTR_SHIFT		0
+#define MSI_CFG2_MEMATTR_DEVICE_nGnRE	(0x1 << MSI_CFG2_MEMATTR_SHIFT)
 
 #define Q_IDX(q, p)			((p) & ((1 << (q)->max_n_shift) - 1))
 #define Q_WRP(q, p)			((p) & (1 << (q)->max_n_shift))

From 28c8b4045b18b013e05656b493ce9a57cbf1f09a Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 16 Jul 2015 17:50:12 +0100
Subject: [PATCH 067/734] iommu/arm-smmu: Limit 2-level strtab allocation for
 small SID sizes

If the StreamIDs in a system can all be resolved by a single level-2
stream table (i.e. SIDSIZE < SPLIT), then we currently get our maths
wrong and allocate the largest strtab we support, thanks to unsigned
overflow in our calculation.

This patch fixes the issue by checking the SIDSIZE explicitly when
calculating the size of our first-level stream table.

Reported-by: Matt Evans <matt.evans@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/arm-smmu-v3.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index c2c1ad8915d990..4f093373f4c30e 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -2054,9 +2054,17 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
 	int ret;
 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
 
-	/* Calculate the L1 size, capped to the SIDSIZE */
-	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
-	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
+	/*
+	 * If we can resolve everything with a single L2 table, then we
+	 * just need a single L1 descriptor. Otherwise, calculate the L1
+	 * size, capped to the SIDSIZE.
+	 */
+	if (smmu->sid_bits < STRTAB_SPLIT) {
+		size = 0;
+	} else {
+		size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
+		size = min(size, smmu->sid_bits - STRTAB_SPLIT);
+	}
 	cfg->num_l1_ents = 1 << size;
 
 	size += STRTAB_SPLIT;

From bae2c2d421cdea9dd8d62425eef99e389584cdb3 Mon Sep 17 00:00:00 2001
From: Robin Murphy <Robin.Murphy@arm.com>
Date: Wed, 29 Jul 2015 19:46:05 +0100
Subject: [PATCH 068/734] iommu/arm-smmu: Sort out coherency

Currently, we detect whether the SMMU has coherent page table walk
capability from the IDR0.CTTW field, and base our cache maintenance
decisions on that. In preparation for fixing the bogus DMA API usage,
however, we need to ensure that the DMA API agrees about this, which
necessitates deferring to the dma-coherent property in the device tree
for the final say.

As an added bonus, since systems exist where an external CTTW signal
has been tied off incorrectly at integration, allowing DT to override
it offers a neat workaround for coherency issues with such SMMUs.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 .../devicetree/bindings/iommu/arm,smmu.txt    |  6 ++++++
 drivers/iommu/arm-smmu.c                      | 20 ++++++++++++++++---
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
index 06760503a819f5..718074501fcbc9 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
@@ -43,6 +43,12 @@ conditions.
 
 ** System MMU optional properties:
 
+- dma-coherent  : Present if page table walks made by the SMMU are
+                  cache coherent with the CPU.
+
+                  NOTE: this only applies to the SMMU itself, not
+                  masters connected upstream of the SMMU.
+
 - calxeda,smmu-secure-config-access : Enable proper handling of buggy
                   implementations that always use secure access to
                   SMMU configuration registers. In this case non-secure
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 4cd0c29cb58500..0583ed2f33c00c 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -37,6 +37,7 @@
 #include <linux/iopoll.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/pci.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
@@ -1532,6 +1533,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
 	unsigned long size;
 	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
 	u32 id;
+	bool cttw_dt, cttw_reg;
 
 	dev_notice(smmu->dev, "probing hardware configuration...\n");
 	dev_notice(smmu->dev, "SMMUv%d with:\n", smmu->version);
@@ -1571,10 +1573,22 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
 		dev_notice(smmu->dev, "\taddress translation ops\n");
 	}
 
-	if (id & ID0_CTTW) {
+	/*
+	 * In order for DMA API calls to work properly, we must defer to what
+	 * the DT says about coherency, regardless of what the hardware claims.
+	 * Fortunately, this also opens up a workaround for systems where the
+	 * ID register value has ended up configured incorrectly.
+	 */
+	cttw_dt = of_dma_is_coherent(smmu->dev->of_node);
+	cttw_reg = !!(id & ID0_CTTW);
+	if (cttw_dt)
 		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
-		dev_notice(smmu->dev, "\tcoherent table walk\n");
-	}
+	if (cttw_dt || cttw_reg)
+		dev_notice(smmu->dev, "\t%scoherent table walk\n",
+			   cttw_dt ? "" : "non-");
+	if (cttw_dt != cttw_reg)
+		dev_notice(smmu->dev,
+			   "\t(IDR0.CTTW overridden by dma-coherent property)\n");
 
 	if (id & ID0_SMS) {
 		u32 smr, sid, mask;

From 1eddfe8edb8f85a0d7f793090abcbdf8741455de Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Thu, 30 Jul 2015 14:35:14 -0700
Subject: [PATCH 069/734] Smack: Three symbols that should be static

The kbuild test robot reported a couple of these,
and the third showed up by inspection. Making the
symbols static is proper.

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
---
 security/smack/smack_lsm.c | 4 ++--
 security/smack/smackfs.c   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index cc390bccecd775..54fb3a1d8953be 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -291,7 +291,7 @@ static struct smack_known *smk_fetch(const char *name, struct inode *ip,
  *
  * Returns the new blob or NULL if there's no memory available
  */
-struct inode_smack *new_inode_smack(struct smack_known *skp)
+static struct inode_smack *new_inode_smack(struct smack_known *skp)
 {
 	struct inode_smack *isp;
 
@@ -4516,7 +4516,7 @@ static int smack_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
 	return 0;
 }
 
-struct security_hook_list smack_hooks[] = {
+static struct security_hook_list smack_hooks[] = {
 	LSM_HOOK_INIT(ptrace_access_check, smack_ptrace_access_check),
 	LSM_HOOK_INIT(ptrace_traceme, smack_ptrace_traceme),
 	LSM_HOOK_INIT(syslog, smack_syslog),
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index 11b752b366eabe..c20b154a33f22f 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -139,7 +139,7 @@ struct smack_master_list {
 	struct smack_rule	*smk_rule;
 };
 
-LIST_HEAD(smack_rule_list);
+static LIST_HEAD(smack_rule_list);
 
 struct smack_parsed_rule {
 	struct smack_known	*smk_subject;

From f6ef5a2a50816b58e3126206de13d0b9fdf89df5 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Tue, 28 Jul 2015 12:27:01 -0700
Subject: [PATCH 070/734] nvdimm: fix inline function return type warning

Fix multiple build warnings when CONFIG_BTT is not enabled:

In file included from ../drivers/nvdimm/bus.c:29:0:
../drivers/nvdimm/nd.h:169:15: warning: return type defaults to 'int' [-Wreturn-type]
 static inline nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata)
               ^

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: linux-nvdimm@lists.01.org
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/nd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index c41f53e74277cb..835263e47bb87d 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -166,7 +166,7 @@ int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata);
 bool is_nd_btt(struct device *dev);
 struct device *nd_btt_create(struct nd_region *nd_region);
 #else
-static inline nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata)
+static inline int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata)
 {
 	return -ENODEV;
 }

From a73622a70a31c846d9d2a971c78fd1ffab88afcd Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Mon, 20 Jul 2015 17:33:23 -0500
Subject: [PATCH 071/734] Documentation: dt: Add #iommu-cells info to OMAP
 iommu bindings

The OMAP IOMMU bindings is updated to reflect the required #iommu-cells
property.

Signed-off-by: Suman Anna <s-anna@ti.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 Documentation/devicetree/bindings/iommu/ti,omap-iommu.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/Documentation/devicetree/bindings/iommu/ti,omap-iommu.txt b/Documentation/devicetree/bindings/iommu/ti,omap-iommu.txt
index 42531dc387aa6b..869699925fd599 100644
--- a/Documentation/devicetree/bindings/iommu/ti,omap-iommu.txt
+++ b/Documentation/devicetree/bindings/iommu/ti,omap-iommu.txt
@@ -8,6 +8,11 @@ Required properties:
 - ti,hwmods  : Name of the hwmod associated with the IOMMU instance
 - reg        : Address space for the configuration registers
 - interrupts : Interrupt specifier for the IOMMU instance
+- #iommu-cells : Should be 0. OMAP IOMMUs are all "single-master" devices,
+                 and needs no additional data in the pargs specifier. Please
+                 also refer to the generic bindings document for more info
+                 on this property,
+                     Documentation/devicetree/bindings/iommu/iommu.txt
 
 Optional properties:
 - ti,#tlb-entries : Number of entries in the translation look-aside buffer.
@@ -18,6 +23,7 @@ Optional properties:
 Example:
 	/* OMAP3 ISP MMU */
 	mmu_isp: mmu@480bd400 {
+		#iommu-cells = <0>;
 		compatible = "ti,omap2-iommu";
 		reg = <0x480bd400 0x80>;
 		interrupts = <24>;

From 0cdbf727167a2fcc9ba2aaea98e2a76124ba072e Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Mon, 20 Jul 2015 17:33:24 -0500
Subject: [PATCH 072/734] iommu/omap: Remove all module references

The OMAP IOMMU driver has been adapted to the IOMMU framework
for a while now, and it does not support being built as a
module anymore. So, remove all the module references from the
OMAP IOMMU driver.

While at it, also relocate a comment around the subsys_initcall
to avoid a checkpatch strict warning about using a blank line
after function/struct/union/enum declarations.

Signed-off-by: Suman Anna <s-anna@ti.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/omap-iommu.c | 19 +------------------
 1 file changed, 1 insertion(+), 18 deletions(-)

diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index a22c33d6a486c9..eeecfc4073aff6 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -12,7 +12,6 @@
  */
 
 #include <linux/err.h>
-#include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
@@ -1089,7 +1088,6 @@ static const struct of_device_id omap_iommu_of_match[] = {
 	{ .compatible = "ti,dra7-iommu"	},
 	{},
 };
-MODULE_DEVICE_TABLE(of, omap_iommu_of_match);
 
 static struct platform_driver omap_iommu_driver = {
 	.probe	= omap_iommu_probe,
@@ -1405,20 +1403,5 @@ static int __init omap_iommu_init(void)
 
 	return platform_driver_register(&omap_iommu_driver);
 }
-/* must be ready before omap3isp is probed */
 subsys_initcall(omap_iommu_init);
-
-static void __exit omap_iommu_exit(void)
-{
-	kmem_cache_destroy(iopte_cachep);
-
-	platform_driver_unregister(&omap_iommu_driver);
-
-	omap_iommu_debugfs_exit();
-}
-module_exit(omap_iommu_exit);
-
-MODULE_DESCRIPTION("omap iommu: tlb and pagetable primitives");
-MODULE_ALIAS("platform:omap-iommu");
-MODULE_AUTHOR("Hiroshi DOYU, Paul Mundt and Toshihiro Kobayashi");
-MODULE_LICENSE("GPL v2");
+/* must be ready before omap3isp is probed */

From 69c2c196328e73d3091dd0be89ab4b0c2af4b210 Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Mon, 20 Jul 2015 17:33:25 -0500
Subject: [PATCH 073/734] iommu/omap: Move debugfs functions to
 omap-iommu-debug.c

The main OMAP IOMMU driver file has some helper functions used
by the OMAP IOMMU debugfs functionality, and there is already a
dedicated source file omap-iommu-debug.c dealing with these debugfs
routines. Move all these functions to the omap-iommu-debug.c file,
so that all the debugfs related routines are in one place.

The move required exposing some new functions and moving some
definitions to the internal omap-iommu.h header file.

Signed-off-by: Suman Anna <s-anna@ti.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/omap-iommu-debug.c | 111 +++++++++++++++++++++++
 drivers/iommu/omap-iommu.c       | 148 +------------------------------
 drivers/iommu/omap-iommu.h       |  28 ++++--
 3 files changed, 137 insertions(+), 150 deletions(-)

diff --git a/drivers/iommu/omap-iommu-debug.c b/drivers/iommu/omap-iommu-debug.c
index f3d20a2039d204..b4b96db37e6adb 100644
--- a/drivers/iommu/omap-iommu-debug.c
+++ b/drivers/iommu/omap-iommu-debug.c
@@ -14,6 +14,7 @@
 #include <linux/io.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
+#include <linux/pm_runtime.h>
 #include <linux/debugfs.h>
 #include <linux/platform_data/iommu-omap.h>
 
@@ -29,6 +30,59 @@ static inline bool is_omap_iommu_detached(struct omap_iommu *obj)
 	return !obj->domain;
 }
 
+#define pr_reg(name)							\
+	do {								\
+		ssize_t bytes;						\
+		const char *str = "%20s: %08x\n";			\
+		const int maxcol = 32;					\
+		bytes = snprintf(p, maxcol, str, __stringify(name),	\
+				 iommu_read_reg(obj, MMU_##name));	\
+		p += bytes;						\
+		len -= bytes;						\
+		if (len < maxcol)					\
+			goto out;					\
+	} while (0)
+
+static ssize_t
+omap2_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t len)
+{
+	char *p = buf;
+
+	pr_reg(REVISION);
+	pr_reg(IRQSTATUS);
+	pr_reg(IRQENABLE);
+	pr_reg(WALKING_ST);
+	pr_reg(CNTL);
+	pr_reg(FAULT_AD);
+	pr_reg(TTB);
+	pr_reg(LOCK);
+	pr_reg(LD_TLB);
+	pr_reg(CAM);
+	pr_reg(RAM);
+	pr_reg(GFLUSH);
+	pr_reg(FLUSH_ENTRY);
+	pr_reg(READ_CAM);
+	pr_reg(READ_RAM);
+	pr_reg(EMU_FAULT_AD);
+out:
+	return p - buf;
+}
+
+static ssize_t omap_iommu_dump_ctx(struct omap_iommu *obj, char *buf,
+				   ssize_t bytes)
+{
+	if (!obj || !buf)
+		return -EINVAL;
+
+	pm_runtime_get_sync(obj->dev);
+
+	bytes = omap2_iommu_dump_ctx(obj, buf, bytes);
+
+	pm_runtime_put_sync(obj->dev);
+
+	return bytes;
+}
+
 static ssize_t debug_read_regs(struct file *file, char __user *userbuf,
 			       size_t count, loff_t *ppos)
 {
@@ -55,6 +109,63 @@ static ssize_t debug_read_regs(struct file *file, char __user *userbuf,
 	return bytes;
 }
 
+static int
+__dump_tlb_entries(struct omap_iommu *obj, struct cr_regs *crs, int num)
+{
+	int i;
+	struct iotlb_lock saved;
+	struct cr_regs tmp;
+	struct cr_regs *p = crs;
+
+	pm_runtime_get_sync(obj->dev);
+	iotlb_lock_get(obj, &saved);
+
+	for_each_iotlb_cr(obj, num, i, tmp) {
+		if (!iotlb_cr_valid(&tmp))
+			continue;
+		*p++ = tmp;
+	}
+
+	iotlb_lock_set(obj, &saved);
+	pm_runtime_put_sync(obj->dev);
+
+	return  p - crs;
+}
+
+static ssize_t iotlb_dump_cr(struct omap_iommu *obj, struct cr_regs *cr,
+			     char *buf)
+{
+	char *p = buf;
+
+	/* FIXME: Need more detail analysis of cam/ram */
+	p += sprintf(p, "%08x %08x %01x\n", cr->cam, cr->ram,
+					(cr->cam & MMU_CAM_P) ? 1 : 0);
+
+	return p - buf;
+}
+
+static size_t omap_dump_tlb_entries(struct omap_iommu *obj, char *buf,
+				    ssize_t bytes)
+{
+	int i, num;
+	struct cr_regs *cr;
+	char *p = buf;
+
+	num = bytes / sizeof(*cr);
+	num = min(obj->nr_tlb_entries, num);
+
+	cr = kcalloc(num, sizeof(*cr), GFP_KERNEL);
+	if (!cr)
+		return 0;
+
+	num = __dump_tlb_entries(obj, cr, num);
+	for (i = 0; i < num; i++)
+		p += iotlb_dump_cr(obj, cr + i, p);
+	kfree(cr);
+
+	return p - buf;
+}
+
 static ssize_t debug_read_tlb(struct file *file, char __user *userbuf,
 			      size_t count, loff_t *ppos)
 {
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index eeecfc4073aff6..0fc00f31c39dff 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -37,11 +37,6 @@
 #define to_iommu(dev)							\
 	((struct omap_iommu *)platform_get_drvdata(to_platform_device(dev)))
 
-#define for_each_iotlb_cr(obj, n, __i, cr)				\
-	for (__i = 0;							\
-	     (__i < (n)) && (cr = __iotlb_read_cr((obj), __i), true);	\
-	     __i++)
-
 /* bitmap of the page sizes currently supported */
 #define OMAP_IOMMU_PGSIZES	(SZ_4K | SZ_64K | SZ_1M | SZ_16M)
 
@@ -71,11 +66,6 @@ struct omap_iommu_domain {
 #define MMU_LOCK_VICT(x)	\
 	((x & MMU_LOCK_VICT_MASK) >> MMU_LOCK_VICT_SHIFT)
 
-struct iotlb_lock {
-	short base;
-	short vict;
-};
-
 static struct platform_driver omap_iommu_driver;
 static struct kmem_cache *iopte_cachep;
 
@@ -212,14 +202,6 @@ static void iommu_disable(struct omap_iommu *obj)
 /*
  *	TLB operations
  */
-static inline int iotlb_cr_valid(struct cr_regs *cr)
-{
-	if (!cr)
-		return -EINVAL;
-
-	return cr->cam & MMU_CAM_V;
-}
-
 static u32 iotlb_cr_to_virt(struct cr_regs *cr)
 {
 	u32 page_size = cr->cam & MMU_CAM_PGSZ_MASK;
@@ -259,7 +241,7 @@ static u32 iommu_report_fault(struct omap_iommu *obj, u32 *da)
 	return status;
 }
 
-static void iotlb_lock_get(struct omap_iommu *obj, struct iotlb_lock *l)
+void iotlb_lock_get(struct omap_iommu *obj, struct iotlb_lock *l)
 {
 	u32 val;
 
@@ -267,10 +249,9 @@ static void iotlb_lock_get(struct omap_iommu *obj, struct iotlb_lock *l)
 
 	l->base = MMU_LOCK_BASE(val);
 	l->vict = MMU_LOCK_VICT(val);
-
 }
 
-static void iotlb_lock_set(struct omap_iommu *obj, struct iotlb_lock *l)
+void iotlb_lock_set(struct omap_iommu *obj, struct iotlb_lock *l)
 {
 	u32 val;
 
@@ -296,7 +277,7 @@ static void iotlb_load_cr(struct omap_iommu *obj, struct cr_regs *cr)
 }
 
 /* only used in iotlb iteration for-loop */
-static struct cr_regs __iotlb_read_cr(struct omap_iommu *obj, int n)
+struct cr_regs __iotlb_read_cr(struct omap_iommu *obj, int n)
 {
 	struct cr_regs cr;
 	struct iotlb_lock l;
@@ -467,129 +448,6 @@ static void flush_iotlb_all(struct omap_iommu *obj)
 	pm_runtime_put_sync(obj->dev);
 }
 
-#ifdef CONFIG_OMAP_IOMMU_DEBUG
-
-#define pr_reg(name)							\
-	do {								\
-		ssize_t bytes;						\
-		const char *str = "%20s: %08x\n";			\
-		const int maxcol = 32;					\
-		bytes = snprintf(p, maxcol, str, __stringify(name),	\
-				 iommu_read_reg(obj, MMU_##name));	\
-		p += bytes;						\
-		len -= bytes;						\
-		if (len < maxcol)					\
-			goto out;					\
-	} while (0)
-
-static ssize_t
-omap2_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t len)
-{
-	char *p = buf;
-
-	pr_reg(REVISION);
-	pr_reg(IRQSTATUS);
-	pr_reg(IRQENABLE);
-	pr_reg(WALKING_ST);
-	pr_reg(CNTL);
-	pr_reg(FAULT_AD);
-	pr_reg(TTB);
-	pr_reg(LOCK);
-	pr_reg(LD_TLB);
-	pr_reg(CAM);
-	pr_reg(RAM);
-	pr_reg(GFLUSH);
-	pr_reg(FLUSH_ENTRY);
-	pr_reg(READ_CAM);
-	pr_reg(READ_RAM);
-	pr_reg(EMU_FAULT_AD);
-out:
-	return p - buf;
-}
-
-ssize_t omap_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t bytes)
-{
-	if (!obj || !buf)
-		return -EINVAL;
-
-	pm_runtime_get_sync(obj->dev);
-
-	bytes = omap2_iommu_dump_ctx(obj, buf, bytes);
-
-	pm_runtime_put_sync(obj->dev);
-
-	return bytes;
-}
-
-static int
-__dump_tlb_entries(struct omap_iommu *obj, struct cr_regs *crs, int num)
-{
-	int i;
-	struct iotlb_lock saved;
-	struct cr_regs tmp;
-	struct cr_regs *p = crs;
-
-	pm_runtime_get_sync(obj->dev);
-	iotlb_lock_get(obj, &saved);
-
-	for_each_iotlb_cr(obj, num, i, tmp) {
-		if (!iotlb_cr_valid(&tmp))
-			continue;
-		*p++ = tmp;
-	}
-
-	iotlb_lock_set(obj, &saved);
-	pm_runtime_put_sync(obj->dev);
-
-	return  p - crs;
-}
-
-/**
- * iotlb_dump_cr - Dump an iommu tlb entry into buf
- * @obj:	target iommu
- * @cr:		contents of cam and ram register
- * @buf:	output buffer
- **/
-static ssize_t iotlb_dump_cr(struct omap_iommu *obj, struct cr_regs *cr,
-			     char *buf)
-{
-	char *p = buf;
-
-	/* FIXME: Need more detail analysis of cam/ram */
-	p += sprintf(p, "%08x %08x %01x\n", cr->cam, cr->ram,
-					(cr->cam & MMU_CAM_P) ? 1 : 0);
-
-	return p - buf;
-}
-
-/**
- * omap_dump_tlb_entries - dump cr arrays to given buffer
- * @obj:	target iommu
- * @buf:	output buffer
- **/
-size_t omap_dump_tlb_entries(struct omap_iommu *obj, char *buf, ssize_t bytes)
-{
-	int i, num;
-	struct cr_regs *cr;
-	char *p = buf;
-
-	num = bytes / sizeof(*cr);
-	num = min(obj->nr_tlb_entries, num);
-
-	cr = kcalloc(num, sizeof(*cr), GFP_KERNEL);
-	if (!cr)
-		return 0;
-
-	num = __dump_tlb_entries(obj, cr, num);
-	for (i = 0; i < num; i++)
-		p += iotlb_dump_cr(obj, cr + i, p);
-	kfree(cr);
-
-	return p - buf;
-}
-
-#endif /* CONFIG_OMAP_IOMMU_DEBUG */
-
 /*
  *	H/W pagetable operations
  */
diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h
index d736630df3c8a1..b6cc90b2ba41e6 100644
--- a/drivers/iommu/omap-iommu.h
+++ b/drivers/iommu/omap-iommu.h
@@ -13,6 +13,11 @@
 #ifndef _OMAP_IOMMU_H
 #define _OMAP_IOMMU_H
 
+#define for_each_iotlb_cr(obj, n, __i, cr)				\
+	for (__i = 0;							\
+	     (__i < (n)) && (cr = __iotlb_read_cr((obj), __i), true);	\
+	     __i++)
+
 struct iotlb_entry {
 	u32 da;
 	u32 pa;
@@ -65,6 +70,11 @@ struct cr_regs {
 	};
 };
 
+struct iotlb_lock {
+	short base;
+	short vict;
+};
+
 /**
  * dev_to_omap_iommu() - retrieves an omap iommu object from a user device
  * @dev: iommu client device
@@ -190,12 +200,12 @@ static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev)
 /*
  * global functions
  */
-#ifdef CONFIG_OMAP_IOMMU_DEBUG
-extern ssize_t
-omap_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t len);
-extern size_t
-omap_dump_tlb_entries(struct omap_iommu *obj, char *buf, ssize_t len);
 
+struct cr_regs __iotlb_read_cr(struct omap_iommu *obj, int n);
+void iotlb_lock_get(struct omap_iommu *obj, struct iotlb_lock *l);
+void iotlb_lock_set(struct omap_iommu *obj, struct iotlb_lock *l);
+
+#ifdef CONFIG_OMAP_IOMMU_DEBUG
 void omap_iommu_debugfs_init(void);
 void omap_iommu_debugfs_exit(void);
 
@@ -222,4 +232,12 @@ static inline void iommu_write_reg(struct omap_iommu *obj, u32 val, size_t offs)
 	__raw_writel(val, obj->regbase + offs);
 }
 
+static inline int iotlb_cr_valid(struct cr_regs *cr)
+{
+	if (!cr)
+		return -EINVAL;
+
+	return cr->cam & MMU_CAM_V;
+}
+
 #endif /* _OMAP_IOMMU_H */

From ad8e29a0804494bff5f0059df3805423ed2020b8 Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Mon, 20 Jul 2015 17:33:26 -0500
Subject: [PATCH 074/734] iommu/omap: Protect omap-iopgtable.h against double
 inclusion

Protect the omap-pgtable.h header against double inclusion in
source code by using the standard include guard mechanism.

Signed-off-by: Suman Anna <s-anna@ti.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/omap-iopgtable.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/iommu/omap-iopgtable.h b/drivers/iommu/omap-iopgtable.h
index f891683e3f05af..bfde5405f5146b 100644
--- a/drivers/iommu/omap-iopgtable.h
+++ b/drivers/iommu/omap-iopgtable.h
@@ -10,6 +10,9 @@
  * published by the Free Software Foundation.
  */
 
+#ifndef _OMAP_IOPGTABLE_H
+#define _OMAP_IOPGTABLE_H
+
 /*
  * "L2 table" address mask and size definitions.
  */
@@ -93,3 +96,5 @@ static inline phys_addr_t omap_iommu_translate(u32 d, u32 va, u32 mask)
 /* to find an entry in the second-level page table. */
 #define iopte_index(da)		(((da) >> IOPTE_SHIFT) & (PTRS_PER_IOPTE - 1))
 #define iopte_offset(iopgd, da)	(iopgd_page_vaddr(iopgd) + iopte_index(da))
+
+#endif /* _OMAP_IOPGTABLE_H */

From dc308f9f92b084a25989fd2002fac06cbf4a73d4 Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Mon, 20 Jul 2015 17:33:27 -0500
Subject: [PATCH 075/734] iommu/omap: Remove unused union fields

There are couple of unions defined in the structures
iotlb_entry and cr_regs. There are no usage/references
to some of these union fields in the code, so clean
them up and simplify the structures.

Signed-off-by: Suman Anna <s-anna@ti.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/omap-iommu.h | 23 +++--------------------
 1 file changed, 3 insertions(+), 20 deletions(-)

diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h
index b6cc90b2ba41e6..5b98408c18bf9b 100644
--- a/drivers/iommu/omap-iommu.h
+++ b/drivers/iommu/omap-iommu.h
@@ -22,12 +22,7 @@ struct iotlb_entry {
 	u32 da;
 	u32 pa;
 	u32 pgsz, prsvd, valid;
-	union {
-		u16 ap;
-		struct {
-			u32 endian, elsz, mixed;
-		};
-	};
+	u32 endian, elsz, mixed;
 };
 
 struct omap_iommu {
@@ -54,20 +49,8 @@ struct omap_iommu {
 };
 
 struct cr_regs {
-	union {
-		struct {
-			u16 cam_l;
-			u16 cam_h;
-		};
-		u32 cam;
-	};
-	union {
-		struct {
-			u16 ram_l;
-			u16 ram_h;
-		};
-		u32 ram;
-	};
+	u32 cam;
+	u32 ram;
 };
 
 struct iotlb_lock {

From 5b39a37abc007542995506ad0d8e4c3991e6970a Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Mon, 20 Jul 2015 17:33:28 -0500
Subject: [PATCH 076/734] iommu/omap: Remove trailing semi-colon from a macro

Remove the trailing semi-colon in the DEBUG_FOPS_RO macro
definition. This fixes the checking warning,
    "WARNING: macros should not use a trailing semicolon"

Signed-off-by: Suman Anna <s-anna@ti.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/omap-iommu-debug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/omap-iommu-debug.c b/drivers/iommu/omap-iommu-debug.c
index b4b96db37e6adb..e9f116f18531ef 100644
--- a/drivers/iommu/omap-iommu-debug.c
+++ b/drivers/iommu/omap-iommu-debug.c
@@ -265,7 +265,7 @@ static int debug_read_pagetable(struct seq_file *s, void *data)
 		.open = simple_open,					\
 		.read = debug_read_##name,				\
 		.llseek = generic_file_llseek,				\
-	};
+	}
 
 DEBUG_FOPS_RO(regs);
 DEBUG_FOPS_RO(tlb);

From 99ee98d6ac964f1a2412d9fe08e577aa4f13905d Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Mon, 20 Jul 2015 17:33:29 -0500
Subject: [PATCH 077/734] iommu/omap: Remove unnecessary error traces on alloc
 failures

Fix couple of checkpatch warnings of the type,
    "WARNING: Possible unnecessary 'out of memory' message"

Signed-off-by: Suman Anna <s-anna@ti.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/omap-iommu.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 0fc00f31c39dff..4328d9855edb00 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -1093,16 +1093,12 @@ static struct iommu_domain *omap_iommu_domain_alloc(unsigned type)
 		return NULL;
 
 	omap_domain = kzalloc(sizeof(*omap_domain), GFP_KERNEL);
-	if (!omap_domain) {
-		pr_err("kzalloc failed\n");
+	if (!omap_domain)
 		goto out;
-	}
 
 	omap_domain->pgtable = kzalloc(IOPGD_TABLE_SIZE, GFP_KERNEL);
-	if (!omap_domain->pgtable) {
-		pr_err("kzalloc failed\n");
+	if (!omap_domain->pgtable)
 		goto fail_nomem;
-	}
 
 	/*
 	 * should never fail, but please keep this around to ensure

From 5ff98fa68c88d7babf96b7df7c713aaf2ed6558a Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Mon, 20 Jul 2015 17:33:30 -0500
Subject: [PATCH 078/734] iommu/omap: Use BIT(x) macros in omap-iopgtable.h

Switch to using the BIT(x) macros in omap-iopgtable.h where
possible. This eliminates the following checkpatch check
warning:
    "CHECK: Prefer using the BIT macro"

A couple of macros that used zero bit shifting are defined
directly to avoid the above warning on one of the macros.

Signed-off-by: Suman Anna <s-anna@ti.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/omap-iopgtable.h | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/iommu/omap-iopgtable.h b/drivers/iommu/omap-iopgtable.h
index bfde5405f5146b..01a315227bf052 100644
--- a/drivers/iommu/omap-iopgtable.h
+++ b/drivers/iommu/omap-iopgtable.h
@@ -13,25 +13,27 @@
 #ifndef _OMAP_IOPGTABLE_H
 #define _OMAP_IOPGTABLE_H
 
+#include <linux/bitops.h>
+
 /*
  * "L2 table" address mask and size definitions.
  */
 #define IOPGD_SHIFT		20
-#define IOPGD_SIZE		(1UL << IOPGD_SHIFT)
+#define IOPGD_SIZE		BIT(IOPGD_SHIFT)
 #define IOPGD_MASK		(~(IOPGD_SIZE - 1))
 
 /*
  * "section" address mask and size definitions.
  */
 #define IOSECTION_SHIFT		20
-#define IOSECTION_SIZE		(1UL << IOSECTION_SHIFT)
+#define IOSECTION_SIZE		BIT(IOSECTION_SHIFT)
 #define IOSECTION_MASK		(~(IOSECTION_SIZE - 1))
 
 /*
  * "supersection" address mask and size definitions.
  */
 #define IOSUPER_SHIFT		24
-#define IOSUPER_SIZE		(1UL << IOSUPER_SHIFT)
+#define IOSUPER_SIZE		BIT(IOSUPER_SHIFT)
 #define IOSUPER_MASK		(~(IOSUPER_SIZE - 1))
 
 #define PTRS_PER_IOPGD		(1UL << (32 - IOPGD_SHIFT))
@@ -41,14 +43,14 @@
  * "small page" address mask and size definitions.
  */
 #define IOPTE_SHIFT		12
-#define IOPTE_SIZE		(1UL << IOPTE_SHIFT)
+#define IOPTE_SIZE		BIT(IOPTE_SHIFT)
 #define IOPTE_MASK		(~(IOPTE_SIZE - 1))
 
 /*
  * "large page" address mask and size definitions.
  */
 #define IOLARGE_SHIFT		16
-#define IOLARGE_SIZE		(1UL << IOLARGE_SHIFT)
+#define IOLARGE_SIZE		BIT(IOLARGE_SHIFT)
 #define IOLARGE_MASK		(~(IOLARGE_SIZE - 1))
 
 #define PTRS_PER_IOPTE		(1UL << (IOPGD_SHIFT - IOPTE_SHIFT))
@@ -72,16 +74,16 @@ static inline phys_addr_t omap_iommu_translate(u32 d, u32 va, u32 mask)
 /*
  * some descriptor attributes.
  */
-#define IOPGD_TABLE		(1 << 0)
-#define IOPGD_SECTION		(2 << 0)
-#define IOPGD_SUPER		(1 << 18 | 2 << 0)
+#define IOPGD_TABLE		(1)
+#define IOPGD_SECTION		(2)
+#define IOPGD_SUPER		(BIT(18) | IOPGD_SECTION)
 
 #define iopgd_is_table(x)	(((x) & 3) == IOPGD_TABLE)
 #define iopgd_is_section(x)	(((x) & (1 << 18 | 3)) == IOPGD_SECTION)
 #define iopgd_is_super(x)	(((x) & (1 << 18 | 3)) == IOPGD_SUPER)
 
-#define IOPTE_SMALL		(2 << 0)
-#define IOPTE_LARGE		(1 << 0)
+#define IOPTE_SMALL		(2)
+#define IOPTE_LARGE		(1)
 
 #define iopte_is_small(x)	(((x) & 2) == IOPTE_SMALL)
 #define iopte_is_large(x)	(((x) & 3) == IOPTE_LARGE)

From eb642a3f5afdb13aa2b7ba0bda314b0d2b62165d Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Mon, 20 Jul 2015 17:33:31 -0500
Subject: [PATCH 079/734] iommu/omap: Use BIT(x) macros in omap-iommu.h

Switch to using the BIT(x) macros in omap-iommu.h where
possible. This eliminates the following checkpatch check
warning:
    "CHECK: Prefer using the BIT macro"

A couple of the warnings were ignored for better readability
of the bit-shift for the different values.

Signed-off-by: Suman Anna <s-anna@ti.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/omap-iommu.h | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h
index 5b98408c18bf9b..a656df2f9e03d2 100644
--- a/drivers/iommu/omap-iommu.h
+++ b/drivers/iommu/omap-iommu.h
@@ -13,6 +13,8 @@
 #ifndef _OMAP_IOMMU_H
 #define _OMAP_IOMMU_H
 
+#include <linux/bitops.h>
+
 #define for_each_iotlb_cr(obj, n, __i, cr)				\
 	for (__i = 0;							\
 	     (__i < (n)) && (cr = __iotlb_read_cr((obj), __i), true);	\
@@ -96,11 +98,11 @@ static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev)
  * MMU Register bit definitions
  */
 /* IRQSTATUS & IRQENABLE */
-#define MMU_IRQ_MULTIHITFAULT	(1 << 4)
-#define MMU_IRQ_TABLEWALKFAULT	(1 << 3)
-#define MMU_IRQ_EMUMISS		(1 << 2)
-#define MMU_IRQ_TRANSLATIONFAULT	(1 << 1)
-#define MMU_IRQ_TLBMISS		(1 << 0)
+#define MMU_IRQ_MULTIHITFAULT	BIT(4)
+#define MMU_IRQ_TABLEWALKFAULT	BIT(3)
+#define MMU_IRQ_EMUMISS		BIT(2)
+#define MMU_IRQ_TRANSLATIONFAULT	BIT(1)
+#define MMU_IRQ_TLBMISS		BIT(0)
 
 #define __MMU_IRQ_FAULT		\
 	(MMU_IRQ_MULTIHITFAULT | MMU_IRQ_EMUMISS | MMU_IRQ_TRANSLATIONFAULT)
@@ -112,16 +114,16 @@ static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev)
 /* MMU_CNTL */
 #define MMU_CNTL_SHIFT		1
 #define MMU_CNTL_MASK		(7 << MMU_CNTL_SHIFT)
-#define MMU_CNTL_EML_TLB	(1 << 3)
-#define MMU_CNTL_TWL_EN		(1 << 2)
-#define MMU_CNTL_MMU_EN		(1 << 1)
+#define MMU_CNTL_EML_TLB	BIT(3)
+#define MMU_CNTL_TWL_EN		BIT(2)
+#define MMU_CNTL_MMU_EN		BIT(1)
 
 /* CAM */
 #define MMU_CAM_VATAG_SHIFT	12
 #define MMU_CAM_VATAG_MASK \
 	((~0UL >> MMU_CAM_VATAG_SHIFT) << MMU_CAM_VATAG_SHIFT)
-#define MMU_CAM_P		(1 << 3)
-#define MMU_CAM_V		(1 << 2)
+#define MMU_CAM_P		BIT(3)
+#define MMU_CAM_V		BIT(2)
 #define MMU_CAM_PGSZ_MASK	3
 #define MMU_CAM_PGSZ_1M		(0 << 0)
 #define MMU_CAM_PGSZ_64K	(1 << 0)
@@ -134,9 +136,9 @@ static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev)
 	((~0UL >> MMU_RAM_PADDR_SHIFT) << MMU_RAM_PADDR_SHIFT)
 
 #define MMU_RAM_ENDIAN_SHIFT	9
-#define MMU_RAM_ENDIAN_MASK	(1 << MMU_RAM_ENDIAN_SHIFT)
+#define MMU_RAM_ENDIAN_MASK	BIT(MMU_RAM_ENDIAN_SHIFT)
 #define MMU_RAM_ENDIAN_LITTLE	(0 << MMU_RAM_ENDIAN_SHIFT)
-#define MMU_RAM_ENDIAN_BIG	(1 << MMU_RAM_ENDIAN_SHIFT)
+#define MMU_RAM_ENDIAN_BIG	BIT(MMU_RAM_ENDIAN_SHIFT)
 
 #define MMU_RAM_ELSZ_SHIFT	7
 #define MMU_RAM_ELSZ_MASK	(3 << MMU_RAM_ELSZ_SHIFT)
@@ -145,7 +147,7 @@ static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev)
 #define MMU_RAM_ELSZ_32		(2 << MMU_RAM_ELSZ_SHIFT)
 #define MMU_RAM_ELSZ_NONE	(3 << MMU_RAM_ELSZ_SHIFT)
 #define MMU_RAM_MIXED_SHIFT	6
-#define MMU_RAM_MIXED_MASK	(1 << MMU_RAM_MIXED_SHIFT)
+#define MMU_RAM_MIXED_MASK	BIT(MMU_RAM_MIXED_SHIFT)
 #define MMU_RAM_MIXED		MMU_RAM_MIXED_MASK
 
 #define MMU_GP_REG_BUS_ERR_BACK_EN	0x1

From 5835b6a64ce39434d5cc9857769c73982d488b42 Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Mon, 20 Jul 2015 17:33:32 -0500
Subject: [PATCH 080/734] iommu/omap: Align code with open parenthesis

Fix all the occurrences of the following check warning
generated with the checkpatch --strict option:
    "CHECK: Alignment should match open parenthesis"

Signed-off-by: Suman Anna <s-anna@ti.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/omap-iommu.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 4328d9855edb00..36d0033c2ccbfc 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -787,14 +787,14 @@ static irqreturn_t iommu_fault_handler(int irq, void *data)
 
 	if (!iopgd_is_table(*iopgd)) {
 		dev_err(obj->dev, "%s: errs:0x%08x da:0x%08x pgd:0x%p *pgd:px%08x\n",
-				obj->name, errs, da, iopgd, *iopgd);
+			obj->name, errs, da, iopgd, *iopgd);
 		return IRQ_NONE;
 	}
 
 	iopte = iopte_offset(iopgd, da);
 
 	dev_err(obj->dev, "%s: errs:0x%08x da:0x%08x pgd:0x%p *pgd:0x%08x pte:0x%p *pte:0x%08x\n",
-			obj->name, errs, da, iopgd, *iopgd, iopte, *iopte);
+		obj->name, errs, da, iopgd, *iopgd, iopte, *iopte);
 
 	return IRQ_NONE;
 }
@@ -820,9 +820,8 @@ static struct omap_iommu *omap_iommu_attach(const char *name, u32 *iopgd)
 	struct device *dev;
 	struct omap_iommu *obj;
 
-	dev = driver_find_device(&omap_iommu_driver.driver, NULL,
-				(void *)name,
-				device_match_by_alias);
+	dev = driver_find_device(&omap_iommu_driver.driver, NULL, (void *)name,
+				 device_match_by_alias);
 	if (!dev)
 		return ERR_PTR(-ENODEV);
 
@@ -977,7 +976,7 @@ static u32 iotlb_init_entry(struct iotlb_entry *e, u32 da, u32 pa, int pgsz)
 }
 
 static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
-			 phys_addr_t pa, size_t bytes, int prot)
+			  phys_addr_t pa, size_t bytes, int prot)
 {
 	struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
 	struct omap_iommu *oiommu = omap_domain->iommu_dev;
@@ -1004,7 +1003,7 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
 }
 
 static size_t omap_iommu_unmap(struct iommu_domain *domain, unsigned long da,
-			    size_t size)
+			       size_t size)
 {
 	struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
 	struct omap_iommu *oiommu = omap_domain->iommu_dev;
@@ -1055,7 +1054,7 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
 }
 
 static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain,
-			struct device *dev)
+				   struct device *dev)
 {
 	struct omap_iommu *oiommu = dev_to_omap_iommu(dev);
 	struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
@@ -1076,7 +1075,7 @@ static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain,
 }
 
 static void omap_iommu_detach_dev(struct iommu_domain *domain,
-				 struct device *dev)
+				  struct device *dev)
 {
 	struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
 
@@ -1137,7 +1136,7 @@ static void omap_iommu_domain_free(struct iommu_domain *domain)
 }
 
 static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain,
-					  dma_addr_t da)
+					   dma_addr_t da)
 {
 	struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
 	struct omap_iommu *oiommu = omap_domain->iommu_dev;
@@ -1154,7 +1153,7 @@ static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain,
 			ret = omap_iommu_translate(*pte, da, IOLARGE_MASK);
 		else
 			dev_err(dev, "bogus pte 0x%x, da 0x%llx", *pte,
-							(unsigned long long)da);
+				(unsigned long long)da);
 	} else {
 		if (iopgd_is_section(*pgd))
 			ret = omap_iommu_translate(*pgd, da, IOSECTION_MASK);
@@ -1162,7 +1161,7 @@ static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain,
 			ret = omap_iommu_translate(*pgd, da, IOSUPER_MASK);
 		else
 			dev_err(dev, "bogus pgd 0x%x, da 0x%llx", *pgd,
-							(unsigned long long)da);
+				(unsigned long long)da);
 	}
 
 	return ret;

From 7b0ce727bf7ac5240a433109f53bf78788f9159b Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Wed, 22 Jul 2015 18:47:00 +0100
Subject: [PATCH 081/734] of: iommu: Silence misleading warning

Printing "IOMMU is currently not supported for PCI" for
every PCI device probed on a DT-based system proves to be
both irritatingly noisy and confusing to users who have
misinterpreted it to mean they can no longer use VFIO device
assignment.

Since configuring DMA masks for PCI devices via
of_dma_configure() has not in fact changed anything with
regard to IOMMUs there really is nothing to warn about here;
shut it up.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/of_iommu.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 43429ab62228a4..60ba238090d92f 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -141,10 +141,12 @@ struct iommu_ops *of_iommu_configure(struct device *dev,
 	struct iommu_ops *ops = NULL;
 	int idx = 0;
 
-	if (dev_is_pci(dev)) {
-		dev_err(dev, "IOMMU is currently not supported for PCI\n");
+	/*
+	 * We can't do much for PCI devices without knowing how
+	 * device IDs are wired up from the PCI bus to the IOMMU.
+	 */
+	if (dev_is_pci(dev))
 		return NULL;
-	}
 
 	/*
 	 * We don't currently walk up the tree looking for a parent IOMMU.

From 2439d4aa9247f4c94351d0cf7d75c16146785eb8 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 24 Jul 2015 16:27:57 -0700
Subject: [PATCH 082/734] iommu/vt-d: Avoid format string leaks into
 iommu_device_create

This makes sure it won't be possible to accidentally leak format
strings into iommu device names. Current name allocations are safe,
but this makes the "%s" explicit.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/dmar.c        | 2 +-
 drivers/iommu/intel-iommu.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index c9db04d4ef39ae..8757f8dfc4e57a 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -1068,7 +1068,7 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
 	if (intel_iommu_enabled)
 		iommu->iommu_dev = iommu_device_create(NULL, iommu,
 						       intel_iommu_groups,
-						       iommu->name);
+						       "%s", iommu->name);
 
 	return 0;
 
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 0649b94f59584c..0be23c589d3b31 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -4533,7 +4533,7 @@ int __init intel_iommu_init(void)
 	for_each_active_iommu(iommu, drhd)
 		iommu->iommu_dev = iommu_device_create(NULL, iommu,
 						       intel_iommu_groups,
-						       iommu->name);
+						       "%s", iommu->name);
 
 	bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
 	bus_register_notifier(&pci_bus_type, &device_nb);

From 50690762cfe32abadbaa5b22bebe3855e5b8ede8 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 30 Jul 2015 12:54:01 -0400
Subject: [PATCH 083/734] iommu/vt-d: Fix leaked ioremap mapping

iommu_load_old_irte() appears to leak the old_irte mapping after use.

Cc: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel_irq_remapping.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index f15692a410c7e7..27cdfa84ec5bd3 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -426,6 +426,8 @@ static int iommu_load_old_irte(struct intel_iommu *iommu)
 			bitmap_set(iommu->ir_table->bitmap, i, 1);
 	}
 
+	iounmap(old_ir_table);
+
 	return 0;
 }
 

From 2238c0827a9bfa8d517e3175110ed603fb7b9537 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Tue, 14 Jul 2015 15:24:53 -0600
Subject: [PATCH 084/734] iommu/vt-d: Report domain usage in sysfs

Debugging domain ID leakage typically requires long running tests in
order to exhaust the domain ID space or kernel instrumentation to
track the setting and clearing of bits.  A couple trivial intel-iommu
specific sysfs extensions make it much easier to expose the IOMMU
capabilities and current usage.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 0be23c589d3b31..013cbc20057970 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -4449,11 +4449,32 @@ static ssize_t intel_iommu_show_ecap(struct device *dev,
 }
 static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
 
+static ssize_t intel_iommu_show_ndoms(struct device *dev,
+				      struct device_attribute *attr,
+				      char *buf)
+{
+	struct intel_iommu *iommu = dev_get_drvdata(dev);
+	return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
+}
+static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
+
+static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
+{
+	struct intel_iommu *iommu = dev_get_drvdata(dev);
+	return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
+						  cap_ndoms(iommu->cap)));
+}
+static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
+
 static struct attribute *intel_iommu_attrs[] = {
 	&dev_attr_version.attr,
 	&dev_attr_address.attr,
 	&dev_attr_cap.attr,
 	&dev_attr_ecap.attr,
+	&dev_attr_domains_supported.attr,
+	&dev_attr_domains_used.attr,
 	NULL,
 };
 

From e203db293863fa15b4b1917d4398fb5bd63c4e88 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Salva=20Peir=C3=B3?= <speirofr@gmail.com>
Date: Thu, 23 Jul 2015 14:26:19 +0200
Subject: [PATCH 085/734] iommu/omap: Fix debug_read_tlb() to use seq_printf()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The debug_read_tlb() uses the sprintf() functions directly on the buffer
allocated by buf = kmalloc(count), without taking into account the size
of the buffer, with the consequence corrupting the heap, depending on
the count requested by the user.

The patch fixes the issue replacing sprintf() by seq_printf().

Signed-off-by: Salva Peiró <speirofr@gmail.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/omap-iommu-debug.c | 48 ++++++++++----------------------
 1 file changed, 14 insertions(+), 34 deletions(-)

diff --git a/drivers/iommu/omap-iommu-debug.c b/drivers/iommu/omap-iommu-debug.c
index e9f116f18531ef..0717aa96ce39bd 100644
--- a/drivers/iommu/omap-iommu-debug.c
+++ b/drivers/iommu/omap-iommu-debug.c
@@ -133,26 +133,18 @@ __dump_tlb_entries(struct omap_iommu *obj, struct cr_regs *crs, int num)
 }
 
 static ssize_t iotlb_dump_cr(struct omap_iommu *obj, struct cr_regs *cr,
-			     char *buf)
+			     struct seq_file *s)
 {
-	char *p = buf;
-
-	/* FIXME: Need more detail analysis of cam/ram */
-	p += sprintf(p, "%08x %08x %01x\n", cr->cam, cr->ram,
-					(cr->cam & MMU_CAM_P) ? 1 : 0);
-
-	return p - buf;
+	return seq_printf(s, "%08x %08x %01x\n", cr->cam, cr->ram,
+			  (cr->cam & MMU_CAM_P) ? 1 : 0);
 }
 
-static size_t omap_dump_tlb_entries(struct omap_iommu *obj, char *buf,
-				    ssize_t bytes)
+static size_t omap_dump_tlb_entries(struct omap_iommu *obj, struct seq_file *s)
 {
 	int i, num;
 	struct cr_regs *cr;
-	char *p = buf;
 
-	num = bytes / sizeof(*cr);
-	num = min(obj->nr_tlb_entries, num);
+	num = obj->nr_tlb_entries;
 
 	cr = kcalloc(num, sizeof(*cr), GFP_KERNEL);
 	if (!cr)
@@ -160,40 +152,28 @@ static size_t omap_dump_tlb_entries(struct omap_iommu *obj, char *buf,
 
 	num = __dump_tlb_entries(obj, cr, num);
 	for (i = 0; i < num; i++)
-		p += iotlb_dump_cr(obj, cr + i, p);
+		iotlb_dump_cr(obj, cr + i, s);
 	kfree(cr);
 
-	return p - buf;
+	return 0;
 }
 
-static ssize_t debug_read_tlb(struct file *file, char __user *userbuf,
-			      size_t count, loff_t *ppos)
+static int debug_read_tlb(struct seq_file *s, void *data)
 {
-	struct omap_iommu *obj = file->private_data;
-	char *p, *buf;
-	ssize_t bytes, rest;
+	struct omap_iommu *obj = s->private;
 
 	if (is_omap_iommu_detached(obj))
 		return -EPERM;
 
-	buf = kmalloc(count, GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-	p = buf;
-
 	mutex_lock(&iommu_debug_lock);
 
-	p += sprintf(p, "%8s %8s\n", "cam:", "ram:");
-	p += sprintf(p, "-----------------------------------------\n");
-	rest = count - (p - buf);
-	p += omap_dump_tlb_entries(obj, p, rest);
-
-	bytes = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+	seq_printf(s, "%8s %8s\n", "cam:", "ram:");
+	seq_puts(s, "-----------------------------------------\n");
+	omap_dump_tlb_entries(obj, s);
 
 	mutex_unlock(&iommu_debug_lock);
-	kfree(buf);
 
-	return bytes;
+	return 0;
 }
 
 static void dump_ioptable(struct seq_file *s)
@@ -268,7 +248,7 @@ static int debug_read_pagetable(struct seq_file *s, void *data)
 	}
 
 DEBUG_FOPS_RO(regs);
-DEBUG_FOPS_RO(tlb);
+DEBUG_SEQ_FOPS_RO(tlb);
 DEBUG_SEQ_FOPS_RO(pagetable);
 
 #define __DEBUG_ADD_FILE(attr, mode)					\

From f8259b262bedd5ec71e55de5953464ea86ff69d9 Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Sat, 1 Aug 2015 15:41:12 -0400
Subject: [PATCH 086/734] audit: eliminate unnecessary extra layer of watch
 references

The audit watch count was imbalanced, adding an unnecessary layer of watch
references.  Only add the second reference when it is added to a parent.

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Paul Moore <pmoore@redhat.com>
---
 kernel/audit_watch.c |  5 ++---
 kernel/auditfilter.c | 16 +++-------------
 2 files changed, 5 insertions(+), 16 deletions(-)

diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index ad9c1682f616a2..54ee4bd66aef19 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -203,7 +203,6 @@ int audit_to_watch(struct audit_krule *krule, char *path, int len, u32 op)
 	if (IS_ERR(watch))
 		return PTR_ERR(watch);
 
-	audit_get_watch(watch);
 	krule->watch = watch;
 
 	return 0;
@@ -387,8 +386,7 @@ static void audit_add_to_parent(struct audit_krule *krule,
 
 		watch_found = 1;
 
-		/* put krule's and initial refs to temporary watch */
-		audit_put_watch(watch);
+		/* put krule's ref to temporary watch */
 		audit_put_watch(watch);
 
 		audit_get_watch(w);
@@ -400,6 +398,7 @@ static void audit_add_to_parent(struct audit_krule *krule,
 		audit_get_parent(parent);
 		watch->parent = parent;
 
+		audit_get_watch(watch);
 		list_add(&watch->wlist, &parent->watches);
 	}
 	list_add(&krule->rlist, &watch->rules);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 72e1660a79a3ab..4cb9b44f806e25 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -549,8 +549,6 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 	return entry;
 
 exit_free:
-	if (entry->rule.watch)
-		audit_put_watch(entry->rule.watch); /* matches initial get */
 	if (entry->rule.tree)
 		audit_put_tree(entry->rule.tree); /* that's the temporary one */
 	audit_free_rule(entry);
@@ -881,7 +879,7 @@ static inline int audit_add_rule(struct audit_entry *entry)
 		/* normally audit_add_tree_rule() will free it on failure */
 		if (tree)
 			audit_put_tree(tree);
-		goto error;
+		return err;
 	}
 
 	if (watch) {
@@ -895,14 +893,14 @@ static inline int audit_add_rule(struct audit_entry *entry)
 			 */
 			if (tree)
 				audit_put_tree(tree);
-			goto error;
+			return err;
 		}
 	}
 	if (tree) {
 		err = audit_add_tree_rule(&entry->rule);
 		if (err) {
 			mutex_unlock(&audit_filter_mutex);
-			goto error;
+			return err;
 		}
 	}
 
@@ -933,11 +931,6 @@ static inline int audit_add_rule(struct audit_entry *entry)
 #endif
 	mutex_unlock(&audit_filter_mutex);
 
- 	return 0;
-
-error:
-	if (watch)
-		audit_put_watch(watch); /* tmp watch, matches initial get */
 	return err;
 }
 
@@ -945,7 +938,6 @@ static inline int audit_add_rule(struct audit_entry *entry)
 static inline int audit_del_rule(struct audit_entry *entry)
 {
 	struct audit_entry  *e;
-	struct audit_watch *watch = entry->rule.watch;
 	struct audit_tree *tree = entry->rule.tree;
 	struct list_head *list;
 	int ret = 0;
@@ -986,8 +978,6 @@ static inline int audit_del_rule(struct audit_entry *entry)
 	mutex_unlock(&audit_filter_mutex);
 
 out:
-	if (watch)
-		audit_put_watch(watch); /* match initial get */
 	if (tree)
 		audit_put_tree(tree);	/* that's the temporary one */
 

From aa7c043d9783f538319e77deeae5d90ff5d6907b Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Sat, 1 Aug 2015 15:41:13 -0400
Subject: [PATCH 087/734] audit: eliminate unnecessary extra layer of watch
 parent references

The audit watch parent count was imbalanced, adding an unnecessary layer of
watch parent references.  Decrement the additional parent reference when a
watch is reused, already having a reference to the parent.

audit_find_parent() gets a reference to the parent, if the parent is
already known.  This additional parental reference is not needed if the
watch is subsequently found by audit_add_to_parent(), and consumed if
the watch does not already exist, so we need to put the parent if the
watch is found, and do nothing if this new watch is added to the parent.

If the parent wasn't already known, it is created with a refcount of 1
and added to the audit_watch_group, then incremented by one to be
subsequently consumed by the newly created watch in
audit_add_to_parent().

The rule points to the watch, not to the parent, so the rule's refcount
gets bumped, not the parent's.

See LKML, 2015-07-16

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Paul Moore <pmoore@redhat.com>
---
 kernel/audit_watch.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 54ee4bd66aef19..b81ad5bc74850f 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -391,11 +391,12 @@ static void audit_add_to_parent(struct audit_krule *krule,
 
 		audit_get_watch(w);
 		krule->watch = watch = w;
+
+		audit_put_parent(parent);
 		break;
 	}
 
 	if (!watch_found) {
-		audit_get_parent(parent);
 		watch->parent = parent;
 
 		audit_get_watch(watch);
@@ -436,9 +437,6 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
 
 	audit_add_to_parent(krule, parent);
 
-	/* match get in audit_find_parent or audit_init_parent */
-	audit_put_parent(parent);
-
 	h = audit_hash_ino((u32)watch->ino);
 	*list = &audit_inode_hash[h];
 error:

From ea215a3f909c570521a9cc276163837ffa2c621a Mon Sep 17 00:00:00 2001
From: Azael Avalos <coproscefalo@gmail.com>
Date: Fri, 31 Jul 2015 21:58:12 -0600
Subject: [PATCH 088/734] toshiba_acpi: Change *available functions return type

This patch changes the *available functions return type from int to
void.

The checks for support of their respective features are done inside
such functions and there was no need to return anything as we can
flag the queried feature as supported inside these functions.

The code was adapted accordingly to these changes and two new
variables were created and another was changed from uint to bool.

Also, the function toshiba_acceleremoter_supported was renamed to
toshiba_accelerometer_available to maintain the naming consistency on
the driver.

Signed-off-by: Azael Avalos <coproscefalo@gmail.com>
Signed-off-by: Darren Hart <dvhart@linux.intel.com>
---
 drivers/platform/x86/toshiba_acpi.c | 129 +++++++++++++---------------
 1 file changed, 62 insertions(+), 67 deletions(-)

diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index f7228987bd509f..d983dc45f30ddb 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -187,7 +187,6 @@ struct toshiba_acpi_dev {
 	unsigned int info_supported:1;
 	unsigned int tr_backlight_supported:1;
 	unsigned int kbd_illum_supported:1;
-	unsigned int kbd_led_registered:1;
 	unsigned int touchpad_supported:1;
 	unsigned int eco_supported:1;
 	unsigned int accelerometer_supported:1;
@@ -198,6 +197,10 @@ struct toshiba_acpi_dev {
 	unsigned int panel_power_on_supported:1;
 	unsigned int usb_three_supported:1;
 	unsigned int sysfs_created:1;
+
+	bool kbd_led_registered;
+	bool illumination_led_registered;
+	bool eco_led_registered;
 };
 
 static struct toshiba_acpi_dev *toshiba_acpi;
@@ -439,26 +442,26 @@ static u32 sci_write(struct toshiba_acpi_dev *dev, u32 reg, u32 in1)
 }
 
 /* Illumination support */
-static int toshiba_illumination_available(struct toshiba_acpi_dev *dev)
+static void toshiba_illumination_available(struct toshiba_acpi_dev *dev)
 {
 	u32 in[TCI_WORDS] = { SCI_GET, SCI_ILLUMINATION, 0, 0, 0, 0 };
 	u32 out[TCI_WORDS];
 	acpi_status status;
 
+	dev->illumination_supported = 0;
+	dev->illumination_led_registered = false;
+
 	if (!sci_open(dev))
-		return 0;
+		return;
 
 	status = tci_raw(dev, in, out);
 	sci_close(dev);
-	if (ACPI_FAILURE(status)) {
+	if (ACPI_FAILURE(status))
 		pr_err("ACPI call to query Illumination support failed\n");
-		return 0;
-	} else if (out[0] == TOS_NOT_SUPPORTED) {
+	else if (out[0] == TOS_NOT_SUPPORTED)
 		pr_info("Illumination device not available\n");
-		return 0;
-	}
-
-	return 1;
+	else if (out[0] == TOS_SUCCESS)
+		dev->illumination_supported = 1;
 }
 
 static void toshiba_illumination_set(struct led_classdev *cdev,
@@ -510,41 +513,42 @@ static enum led_brightness toshiba_illumination_get(struct led_classdev *cdev)
 }
 
 /* KBD Illumination */
-static int toshiba_kbd_illum_available(struct toshiba_acpi_dev *dev)
+static void toshiba_kbd_illum_available(struct toshiba_acpi_dev *dev)
 {
 	u32 in[TCI_WORDS] = { SCI_GET, SCI_KBD_ILLUM_STATUS, 0, 0, 0, 0 };
 	u32 out[TCI_WORDS];
 	acpi_status status;
 
+	dev->kbd_illum_supported = 0;
+	dev->kbd_led_registered = false;
+
 	if (!sci_open(dev))
-		return 0;
+		return;
 
 	status = tci_raw(dev, in, out);
 	sci_close(dev);
 	if (ACPI_FAILURE(status) || out[0] == TOS_INPUT_DATA_ERROR) {
 		pr_err("ACPI call to query kbd illumination support failed\n");
-		return 0;
 	} else if (out[0] == TOS_NOT_SUPPORTED) {
 		pr_info("Keyboard illumination not available\n");
-		return 0;
+	} else if (out[0] == TOS_SUCCESS) {
+		/*
+		 * Check for keyboard backlight timeout max value,
+		 * previous kbd backlight implementation set this to
+		 * 0x3c0003, and now the new implementation set this
+		 * to 0x3c001a, use this to distinguish between them.
+		 */
+		if (out[3] == SCI_KBD_TIME_MAX)
+			dev->kbd_type = 2;
+		else
+			dev->kbd_type = 1;
+		/* Get the current keyboard backlight mode */
+		dev->kbd_mode = out[2] & SCI_KBD_MODE_MASK;
+		/* Get the current time (1-60 seconds) */
+		dev->kbd_time = out[2] >> HCI_MISC_SHIFT;
+		/* Flag as supported */
+		dev->kbd_illum_supported = 1;
 	}
-
-	/*
-	 * Check for keyboard backlight timeout max value,
-	 * previous kbd backlight implementation set this to
-	 * 0x3c0003, and now the new implementation set this
-	 * to 0x3c001a, use this to distinguish between them.
-	 */
-	if (out[3] == SCI_KBD_TIME_MAX)
-		dev->kbd_type = 2;
-	else
-		dev->kbd_type = 1;
-	/* Get the current keyboard backlight mode */
-	dev->kbd_mode = out[2] & SCI_KBD_MODE_MASK;
-	/* Get the current time (1-60 seconds) */
-	dev->kbd_time = out[2] >> HCI_MISC_SHIFT;
-
-	return 1;
 }
 
 static int toshiba_kbd_illum_status_set(struct toshiba_acpi_dev *dev, u32 time)
@@ -665,12 +669,15 @@ static int toshiba_touchpad_get(struct toshiba_acpi_dev *dev, u32 *state)
 }
 
 /* Eco Mode support */
-static int toshiba_eco_mode_available(struct toshiba_acpi_dev *dev)
+static void toshiba_eco_mode_available(struct toshiba_acpi_dev *dev)
 {
 	acpi_status status;
 	u32 in[TCI_WORDS] = { HCI_GET, HCI_ECO_MODE, 0, 0, 0, 0 };
 	u32 out[TCI_WORDS];
 
+	dev->eco_supported = 0;
+	dev->eco_led_registered = false;
+
 	status = tci_raw(dev, in, out);
 	if (ACPI_FAILURE(status)) {
 		pr_err("ACPI call to get ECO led failed\n");
@@ -691,10 +698,8 @@ static int toshiba_eco_mode_available(struct toshiba_acpi_dev *dev)
 		if (ACPI_FAILURE(status) || out[0] == TOS_FAILURE)
 			pr_err("ACPI call to get ECO led failed\n");
 		else if (out[0] == TOS_SUCCESS)
-			return 1;
+			dev->eco_supported = 1;
 	}
-
-	return 0;
 }
 
 static enum led_brightness
@@ -734,30 +739,28 @@ static void toshiba_eco_mode_set_status(struct led_classdev *cdev,
 }
 
 /* Accelerometer support */
-static int toshiba_accelerometer_supported(struct toshiba_acpi_dev *dev)
+static void toshiba_accelerometer_available(struct toshiba_acpi_dev *dev)
 {
 	u32 in[TCI_WORDS] = { HCI_GET, HCI_ACCELEROMETER2, 0, 0, 0, 0 };
 	u32 out[TCI_WORDS];
 	acpi_status status;
 
+	dev->accelerometer_supported = 0;
+
 	/*
 	 * Check if the accelerometer call exists,
 	 * this call also serves as initialization
 	 */
 	status = tci_raw(dev, in, out);
-	if (ACPI_FAILURE(status) || out[0] == TOS_INPUT_DATA_ERROR) {
+	if (ACPI_FAILURE(status) || out[0] == TOS_INPUT_DATA_ERROR)
 		pr_err("ACPI call to query the accelerometer failed\n");
-		return -EIO;
-	} else if (out[0] == TOS_DATA_NOT_AVAILABLE ||
-		   out[0] == TOS_NOT_INITIALIZED) {
+	else if (out[0] == TOS_DATA_NOT_AVAILABLE ||
+		   out[0] == TOS_NOT_INITIALIZED)
 		pr_err("Accelerometer not initialized\n");
-		return -EIO;
-	} else if (out[0] == TOS_NOT_SUPPORTED) {
+	else if (out[0] == TOS_NOT_SUPPORTED)
 		pr_info("Accelerometer not supported\n");
-		return -ENODEV;
-	}
-
-	return 0;
+	else if (out[0] == TOS_SUCCESS)
+		dev->accelerometer_supported = 1;
 }
 
 static int toshiba_accelerometer_get(struct toshiba_acpi_dev *dev,
@@ -787,7 +790,6 @@ static void toshiba_usb_sleep_charge_available(struct toshiba_acpi_dev *dev)
 	u32 out[TCI_WORDS];
 	acpi_status status;
 
-	/* Set the feature to "not supported" in case of error */
 	dev->usb_sleep_charge_supported = 0;
 
 	if (!sci_open(dev))
@@ -808,25 +810,17 @@ static void toshiba_usb_sleep_charge_available(struct toshiba_acpi_dev *dev)
 
 	in[5] = SCI_USB_CHARGE_BAT_LVL;
 	status = tci_raw(dev, in, out);
+	sci_close(dev);
 	if (ACPI_FAILURE(status)) {
 		pr_err("ACPI call to get USB Sleep and Charge mode failed\n");
-		sci_close(dev);
-		return;
 	} else if (out[0] == TOS_NOT_SUPPORTED) {
 		pr_info("USB Sleep and Charge not supported\n");
-		sci_close(dev);
-		return;
 	} else if (out[0] == TOS_SUCCESS) {
 		dev->usbsc_bat_level = out[2];
-		/*
-		 * If we reach this point, it means that the laptop has support
-		 * for this feature and all values are initialized.
-		 * Set it as supported.
-		 */
+		/* Flag as supported */
 		dev->usb_sleep_charge_supported = 1;
 	}
 
-	sci_close(dev);
 }
 
 static int toshiba_usb_sleep_charge_get(struct toshiba_acpi_dev *dev,
@@ -2639,13 +2633,13 @@ static int toshiba_acpi_remove(struct acpi_device *acpi_dev)
 
 	backlight_device_unregister(dev->backlight_dev);
 
-	if (dev->illumination_supported)
+	if (dev->illumination_led_registered)
 		led_classdev_unregister(&dev->led_dev);
 
 	if (dev->kbd_led_registered)
 		led_classdev_unregister(&dev->kbd_led);
 
-	if (dev->eco_supported)
+	if (dev->eco_led_registered)
 		led_classdev_unregister(&dev->eco_led);
 
 	if (toshiba_acpi)
@@ -2727,25 +2721,27 @@ static int toshiba_acpi_add(struct acpi_device *acpi_dev)
 	if (ret)
 		goto error;
 
-	if (toshiba_illumination_available(dev)) {
+	toshiba_illumination_available(dev);
+	if (dev->illumination_supported) {
 		dev->led_dev.name = "toshiba::illumination";
 		dev->led_dev.max_brightness = 1;
 		dev->led_dev.brightness_set = toshiba_illumination_set;
 		dev->led_dev.brightness_get = toshiba_illumination_get;
 		if (!led_classdev_register(&acpi_dev->dev, &dev->led_dev))
-			dev->illumination_supported = 1;
+			dev->illumination_led_registered = true;
 	}
 
-	if (toshiba_eco_mode_available(dev)) {
+	toshiba_eco_mode_available(dev);
+	if (dev->eco_supported) {
 		dev->eco_led.name = "toshiba::eco_mode";
 		dev->eco_led.max_brightness = 1;
 		dev->eco_led.brightness_set = toshiba_eco_mode_set_status;
 		dev->eco_led.brightness_get = toshiba_eco_mode_get_status;
 		if (!led_classdev_register(&dev->acpi_dev->dev, &dev->eco_led))
-			dev->eco_supported = 1;
+			dev->eco_led_registered = true;
 	}
 
-	dev->kbd_illum_supported = toshiba_kbd_illum_available(dev);
+	toshiba_kbd_illum_available(dev);
 	/*
 	 * Only register the LED if KBD illumination is supported
 	 * and the keyboard backlight operation mode is set to FN-Z
@@ -2756,14 +2752,13 @@ static int toshiba_acpi_add(struct acpi_device *acpi_dev)
 		dev->kbd_led.brightness_set = toshiba_kbd_backlight_set;
 		dev->kbd_led.brightness_get = toshiba_kbd_backlight_get;
 		if (!led_classdev_register(&dev->acpi_dev->dev, &dev->kbd_led))
-			dev->kbd_led_registered = 1;
+			dev->kbd_led_registered = true;
 	}
 
 	ret = toshiba_touchpad_get(dev, &dummy);
 	dev->touchpad_supported = !ret;
 
-	ret = toshiba_accelerometer_supported(dev);
-	dev->accelerometer_supported = !ret;
+	toshiba_accelerometer_available(dev);
 
 	toshiba_usb_sleep_charge_available(dev);
 

From 0409cbced3c9ab975e300584b6cc036c26974b43 Mon Sep 17 00:00:00 2001
From: Azael Avalos <coproscefalo@gmail.com>
Date: Fri, 31 Jul 2015 21:58:13 -0600
Subject: [PATCH 089/734] toshiba_acpi: Remove "*not supported" feature prints

Currently the driver prints "*not supported" if any of the features
queried are in fact not supported, let us print the available
features instead.

This patch removes all instances pr_info printing "*not supported",
and add a new function called "print_supported_features", which will
print the available laptop features.

Signed-off-by: Azael Avalos <coproscefalo@gmail.com>
Signed-off-by: Darren Hart <dvhart@linux.intel.com>
---
 drivers/platform/x86/toshiba_acpi.c | 72 ++++++++++++++++++-----------
 1 file changed, 46 insertions(+), 26 deletions(-)

diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index d983dc45f30ddb..66b596a7f710b5 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -459,7 +459,7 @@ static void toshiba_illumination_available(struct toshiba_acpi_dev *dev)
 	if (ACPI_FAILURE(status))
 		pr_err("ACPI call to query Illumination support failed\n");
 	else if (out[0] == TOS_NOT_SUPPORTED)
-		pr_info("Illumination device not available\n");
+		return;
 	else if (out[0] == TOS_SUCCESS)
 		dev->illumination_supported = 1;
 }
@@ -483,7 +483,6 @@ static void toshiba_illumination_set(struct led_classdev *cdev,
 		pr_err("ACPI call for illumination failed\n");
 		return;
 	} else if (result == TOS_NOT_SUPPORTED) {
-		pr_info("Illumination not supported\n");
 		return;
 	}
 }
@@ -505,7 +504,6 @@ static enum led_brightness toshiba_illumination_get(struct led_classdev *cdev)
 		pr_err("ACPI call for illumination failed\n");
 		return LED_OFF;
 	} else if (result == TOS_NOT_SUPPORTED) {
-		pr_info("Illumination not supported\n");
 		return LED_OFF;
 	}
 
@@ -530,7 +528,7 @@ static void toshiba_kbd_illum_available(struct toshiba_acpi_dev *dev)
 	if (ACPI_FAILURE(status) || out[0] == TOS_INPUT_DATA_ERROR) {
 		pr_err("ACPI call to query kbd illumination support failed\n");
 	} else if (out[0] == TOS_NOT_SUPPORTED) {
-		pr_info("Keyboard illumination not available\n");
+		return;
 	} else if (out[0] == TOS_SUCCESS) {
 		/*
 		 * Check for keyboard backlight timeout max value,
@@ -564,7 +562,6 @@ static int toshiba_kbd_illum_status_set(struct toshiba_acpi_dev *dev, u32 time)
 		pr_err("ACPI call to set KBD backlight status failed\n");
 		return -EIO;
 	} else if (result == TOS_NOT_SUPPORTED) {
-		pr_info("Keyboard backlight status not supported\n");
 		return -ENODEV;
 	}
 
@@ -584,7 +581,6 @@ static int toshiba_kbd_illum_status_get(struct toshiba_acpi_dev *dev, u32 *time)
 		pr_err("ACPI call to get KBD backlight status failed\n");
 		return -EIO;
 	} else if (result == TOS_NOT_SUPPORTED) {
-		pr_info("Keyboard backlight status not supported\n");
 		return -ENODEV;
 	}
 
@@ -603,7 +599,6 @@ static enum led_brightness toshiba_kbd_backlight_get(struct led_classdev *cdev)
 		pr_err("ACPI call to get the keyboard backlight failed\n");
 		return LED_OFF;
 	} else if (result == TOS_NOT_SUPPORTED) {
-		pr_info("Keyboard backlight not supported\n");
 		return LED_OFF;
 	}
 
@@ -624,7 +619,6 @@ static void toshiba_kbd_backlight_set(struct led_classdev *cdev,
 		pr_err("ACPI call to set KBD Illumination mode failed\n");
 		return;
 	} else if (result == TOS_NOT_SUPPORTED) {
-		pr_info("Keyboard backlight not supported\n");
 		return;
 	}
 }
@@ -758,7 +752,7 @@ static void toshiba_accelerometer_available(struct toshiba_acpi_dev *dev)
 		   out[0] == TOS_NOT_INITIALIZED)
 		pr_err("Accelerometer not initialized\n");
 	else if (out[0] == TOS_NOT_SUPPORTED)
-		pr_info("Accelerometer not supported\n");
+		return;
 	else if (out[0] == TOS_SUCCESS)
 		dev->accelerometer_supported = 1;
 }
@@ -801,7 +795,6 @@ static void toshiba_usb_sleep_charge_available(struct toshiba_acpi_dev *dev)
 		sci_close(dev);
 		return;
 	} else if (out[0] == TOS_NOT_SUPPORTED) {
-		pr_info("USB Sleep and Charge not supported\n");
 		sci_close(dev);
 		return;
 	} else if (out[0] == TOS_SUCCESS) {
@@ -814,7 +807,7 @@ static void toshiba_usb_sleep_charge_available(struct toshiba_acpi_dev *dev)
 	if (ACPI_FAILURE(status)) {
 		pr_err("ACPI call to get USB Sleep and Charge mode failed\n");
 	} else if (out[0] == TOS_NOT_SUPPORTED) {
-		pr_info("USB Sleep and Charge not supported\n");
+		return;
 	} else if (out[0] == TOS_SUCCESS) {
 		dev->usbsc_bat_level = out[2];
 		/* Flag as supported */
@@ -837,7 +830,6 @@ static int toshiba_usb_sleep_charge_get(struct toshiba_acpi_dev *dev,
 		pr_err("ACPI call to set USB S&C mode failed\n");
 		return -EIO;
 	} else if (result == TOS_NOT_SUPPORTED) {
-		pr_info("USB Sleep and Charge not supported\n");
 		return -ENODEV;
 	} else if (result == TOS_INPUT_DATA_ERROR) {
 		return -EIO;
@@ -860,7 +852,6 @@ static int toshiba_usb_sleep_charge_set(struct toshiba_acpi_dev *dev,
 		pr_err("ACPI call to set USB S&C mode failed\n");
 		return -EIO;
 	} else if (result == TOS_NOT_SUPPORTED) {
-		pr_info("USB Sleep and Charge not supported\n");
 		return -ENODEV;
 	} else if (result == TOS_INPUT_DATA_ERROR) {
 		return -EIO;
@@ -886,7 +877,6 @@ static int toshiba_sleep_functions_status_get(struct toshiba_acpi_dev *dev,
 		pr_err("ACPI call to get USB S&C battery level failed\n");
 		return -EIO;
 	} else if (out[0] == TOS_NOT_SUPPORTED) {
-		pr_info("USB Sleep and Charge not supported\n");
 		return -ENODEV;
 	} else if (out[0] == TOS_INPUT_DATA_ERROR) {
 		return -EIO;
@@ -915,7 +905,6 @@ static int toshiba_sleep_functions_status_set(struct toshiba_acpi_dev *dev,
 		pr_err("ACPI call to set USB S&C battery level failed\n");
 		return -EIO;
 	} else if (out[0] == TOS_NOT_SUPPORTED) {
-		pr_info("USB Sleep and Charge not supported\n");
 		return -ENODEV;
 	} else if (out[0] == TOS_INPUT_DATA_ERROR) {
 		return -EIO;
@@ -942,7 +931,6 @@ static int toshiba_usb_rapid_charge_get(struct toshiba_acpi_dev *dev,
 		return -EIO;
 	} else if (out[0] == TOS_NOT_SUPPORTED ||
 		   out[0] == TOS_INPUT_DATA_ERROR) {
-		pr_info("USB Rapid Charge not supported\n");
 		return -ENODEV;
 	}
 
@@ -969,7 +957,6 @@ static int toshiba_usb_rapid_charge_set(struct toshiba_acpi_dev *dev,
 		pr_err("ACPI call to set USB Rapid Charge failed\n");
 		return -EIO;
 	} else if (out[0] == TOS_NOT_SUPPORTED) {
-		pr_info("USB Rapid Charge not supported\n");
 		return -ENODEV;
 	} else if (out[0] == TOS_INPUT_DATA_ERROR) {
 		return -EIO;
@@ -991,7 +978,6 @@ static int toshiba_usb_sleep_music_get(struct toshiba_acpi_dev *dev, u32 *state)
 		pr_err("ACPI call to get Sleep and Music failed\n");
 		return -EIO;
 	} else if (result == TOS_NOT_SUPPORTED) {
-		pr_info("Sleep and Music not supported\n");
 		return -ENODEV;
 	} else if (result == TOS_INPUT_DATA_ERROR) {
 		return -EIO;
@@ -1013,7 +999,6 @@ static int toshiba_usb_sleep_music_set(struct toshiba_acpi_dev *dev, u32 state)
 		pr_err("ACPI call to set Sleep and Music failed\n");
 		return -EIO;
 	} else if (result == TOS_NOT_SUPPORTED) {
-		pr_info("Sleep and Music not supported\n");
 		return -ENODEV;
 	} else if (result == TOS_INPUT_DATA_ERROR) {
 		return -EIO;
@@ -1036,7 +1021,6 @@ static int toshiba_function_keys_get(struct toshiba_acpi_dev *dev, u32 *mode)
 		pr_err("ACPI call to get KBD function keys failed\n");
 		return -EIO;
 	} else if (result == TOS_NOT_SUPPORTED) {
-		pr_info("KBD function keys not supported\n");
 		return -ENODEV;
 	}
 
@@ -1056,7 +1040,6 @@ static int toshiba_function_keys_set(struct toshiba_acpi_dev *dev, u32 mode)
 		pr_err("ACPI call to set KBD function keys failed\n");
 		return -EIO;
 	} else if (result == TOS_NOT_SUPPORTED) {
-		pr_info("KBD function keys not supported\n");
 		return -ENODEV;
 	}
 
@@ -1077,7 +1060,6 @@ static int toshiba_panel_power_on_get(struct toshiba_acpi_dev *dev, u32 *state)
 		pr_err("ACPI call to get Panel Power ON failed\n");
 		return -EIO;
 	} else if (result == TOS_NOT_SUPPORTED) {
-		pr_info("Panel Power on not supported\n");
 		return -ENODEV;
 	} else if (result == TOS_INPUT_DATA_ERROR) {
 		return -EIO;
@@ -1099,7 +1081,6 @@ static int toshiba_panel_power_on_set(struct toshiba_acpi_dev *dev, u32 state)
 		pr_err("ACPI call to set Panel Power ON failed\n");
 		return -EIO;
 	} else if (result == TOS_NOT_SUPPORTED) {
-		pr_info("Panel Power ON not supported\n");
 		return -ENODEV;
 	} else if (result == TOS_INPUT_DATA_ERROR) {
 		return -EIO;
@@ -1122,7 +1103,6 @@ static int toshiba_usb_three_get(struct toshiba_acpi_dev *dev, u32 *state)
 		pr_err("ACPI call to get USB 3 failed\n");
 		return -EIO;
 	} else if (result == TOS_NOT_SUPPORTED) {
-		pr_info("USB 3 not supported\n");
 		return -ENODEV;
 	} else if (result == TOS_INPUT_DATA_ERROR) {
 		return -EIO;
@@ -1144,7 +1124,6 @@ static int toshiba_usb_three_set(struct toshiba_acpi_dev *dev, u32 state)
 		pr_err("ACPI call to set USB 3 failed\n");
 		return -EIO;
 	} else if (result == TOS_NOT_SUPPORTED) {
-		pr_info("USB 3 not supported\n");
 		return -ENODEV;
 	} else if (result == TOS_INPUT_DATA_ERROR) {
 		return -EIO;
@@ -1166,7 +1145,6 @@ static int toshiba_hotkey_event_type_get(struct toshiba_acpi_dev *dev,
 		pr_err("ACPI call to get System type failed\n");
 		return -EIO;
 	} else if (out[0] == TOS_NOT_SUPPORTED) {
-		pr_info("System type not supported\n");
 		return -ENODEV;
 	}
 
@@ -2609,6 +2587,46 @@ static int toshiba_acpi_setup_backlight(struct toshiba_acpi_dev *dev)
 	return 0;
 }
 
+static void print_supported_features(struct toshiba_acpi_dev *dev)
+{
+	pr_info("Supported laptop features:");
+
+	if (dev->hotkey_dev)
+		pr_cont(" hotkeys");
+	if (dev->backlight_dev)
+		pr_cont(" backlight");
+	if (dev->video_supported)
+		pr_cont(" video-out");
+	if (dev->fan_supported)
+		pr_cont(" fan");
+	if (dev->tr_backlight_supported)
+		pr_cont(" transflective-backlight");
+	if (dev->illumination_supported)
+		pr_cont(" illumination");
+	if (dev->kbd_illum_supported)
+		pr_cont(" keyboard-backlight");
+	if (dev->touchpad_supported)
+		pr_cont(" touchpad");
+	if (dev->eco_supported)
+		pr_cont(" eco-led");
+	if (dev->accelerometer_supported)
+		pr_cont(" accelerometer-axes");
+	if (dev->usb_sleep_charge_supported)
+		pr_cont(" usb-sleep-charge");
+	if (dev->usb_rapid_charge_supported)
+		pr_cont(" usb-rapid-charge");
+	if (dev->usb_sleep_music_supported)
+		pr_cont(" usb-sleep-music");
+	if (dev->kbd_function_keys_supported)
+		pr_cont(" special-function-keys");
+	if (dev->panel_power_on_supported)
+		pr_cont(" panel-power-on");
+	if (dev->usb_three_supported)
+		pr_cont(" usb3");
+
+	pr_cont("\n");
+}
+
 static int toshiba_acpi_remove(struct acpi_device *acpi_dev)
 {
 	struct toshiba_acpi_dev *dev = acpi_driver_data(acpi_dev);
@@ -2780,6 +2798,8 @@ static int toshiba_acpi_add(struct acpi_device *acpi_dev)
 	ret = get_fan_status(dev, &dummy);
 	dev->fan_supported = !ret;
 
+	print_supported_features(dev);
+
 	/*
 	 * Enable the "Special Functions" mode only if they are
 	 * supported and if they are activated.

From e1a949c1b9883d1d0586b0cbdd2c0cc3f55514bd Mon Sep 17 00:00:00 2001
From: Azael Avalos <coproscefalo@gmail.com>
Date: Fri, 31 Jul 2015 21:58:14 -0600
Subject: [PATCH 090/734] toshiba_acpi: Refactor *{get, set} functions return
 value

This patch refactors the return value of the driver *{get, set}
functions, since the driver default error value is -EIO.

All the functions now check for TOS_FAILURE, TOS_NOT_SUPPORTED and
TOS_SUCCESS.

On TOS_FAILURE a pr_err message is printed informing the user of the
error (no change was made to this, except the check was added to the
functions not checking for this).

On TOS_NOT_SUPPORTED we now return -ENODEV immediately (some
functions were returning -EIO and some other were not checking)

On TOS_SUCCESS* we now return 0 (as a side effect, a new success value
was added, since some functions return one instead of zero to
indicate success).

As a special case, the LED functions now check for *FAILURE on
*set, and check for TOS_FAILURE and TOS_SUCCESS on *get with their
"default" return value set to LED_OFF.

Also the {lcd, video}_proc* functions were adapted to reflect these
changes to their parent HCI functions.

Signed-off-by: Azael Avalos <coproscefalo@gmail.com>
Signed-off-by: Darren Hart <dvhart@linux.intel.com>
---
 drivers/platform/x86/toshiba_acpi.c | 177 ++++++++++++++++------------
 1 file changed, 104 insertions(+), 73 deletions(-)

diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index 66b596a7f710b5..65adb122189fc7 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -93,6 +93,7 @@ MODULE_LICENSE("GPL");
 
 /* Return codes */
 #define TOS_SUCCESS			0x0000
+#define TOS_SUCCESS2			0x0001
 #define TOS_OPEN_CLOSE_OK		0x0044
 #define TOS_FAILURE			0x1000
 #define TOS_NOT_SUPPORTED		0x8000
@@ -469,7 +470,8 @@ static void toshiba_illumination_set(struct led_classdev *cdev,
 {
 	struct toshiba_acpi_dev *dev = container_of(cdev,
 			struct toshiba_acpi_dev, led_dev);
-	u32 state, result;
+	u32 result;
+	u32 state;
 
 	/* First request : initialize communication. */
 	if (!sci_open(dev))
@@ -503,7 +505,7 @@ static enum led_brightness toshiba_illumination_get(struct led_classdev *cdev)
 	if (result == TOS_FAILURE || result == TOS_INPUT_DATA_ERROR) {
 		pr_err("ACPI call for illumination failed\n");
 		return LED_OFF;
-	} else if (result == TOS_NOT_SUPPORTED) {
+	} else if (result != TOS_SUCCESS) {
 		return LED_OFF;
 	}
 
@@ -565,7 +567,7 @@ static int toshiba_kbd_illum_status_set(struct toshiba_acpi_dev *dev, u32 time)
 		return -ENODEV;
 	}
 
-	return 0;
+	return result == TOS_SUCCESS ? 0 : -EIO;
 }
 
 static int toshiba_kbd_illum_status_get(struct toshiba_acpi_dev *dev, u32 *time)
@@ -584,21 +586,22 @@ static int toshiba_kbd_illum_status_get(struct toshiba_acpi_dev *dev, u32 *time)
 		return -ENODEV;
 	}
 
-	return 0;
+	return result == TOS_SUCCESS ? 0 : -EIO;
 }
 
 static enum led_brightness toshiba_kbd_backlight_get(struct led_classdev *cdev)
 {
 	struct toshiba_acpi_dev *dev = container_of(cdev,
 			struct toshiba_acpi_dev, kbd_led);
-	u32 state, result;
+	u32 result;
+	u32 state;
 
 	/* Check the keyboard backlight state */
 	result = hci_read(dev, HCI_KBD_ILLUMINATION, &state);
 	if (result == TOS_FAILURE || result == TOS_INPUT_DATA_ERROR) {
 		pr_err("ACPI call to get the keyboard backlight failed\n");
 		return LED_OFF;
-	} else if (result == TOS_NOT_SUPPORTED) {
+	} else if (result != TOS_SUCCESS) {
 		return LED_OFF;
 	}
 
@@ -610,7 +613,8 @@ static void toshiba_kbd_backlight_set(struct led_classdev *cdev,
 {
 	struct toshiba_acpi_dev *dev = container_of(cdev,
 			struct toshiba_acpi_dev, kbd_led);
-	u32 state, result;
+	u32 result;
+	u32 state;
 
 	/* Set the keyboard backlight state */
 	state = brightness ? 1 : 0;
@@ -640,7 +644,7 @@ static int toshiba_touchpad_set(struct toshiba_acpi_dev *dev, u32 state)
 		return -ENODEV;
 	}
 
-	return 0;
+	return result == TOS_SUCCESS ? 0 : -EIO;
 }
 
 static int toshiba_touchpad_get(struct toshiba_acpi_dev *dev, u32 *state)
@@ -659,7 +663,7 @@ static int toshiba_touchpad_get(struct toshiba_acpi_dev *dev, u32 *state)
 		return -ENODEV;
 	}
 
-	return 0;
+	return result == TOS_SUCCESS ? 0 : -EIO;
 }
 
 /* Eco Mode support */
@@ -709,6 +713,8 @@ toshiba_eco_mode_get_status(struct led_classdev *cdev)
 	if (ACPI_FAILURE(status) || out[0] == TOS_INPUT_DATA_ERROR) {
 		pr_err("ACPI call to get ECO led failed\n");
 		return LED_OFF;
+	} else if (out[0] != TOS_SUCCESS) {
+		return LED_OFF;
 	}
 
 	return out[2] ? LED_FULL : LED_OFF;
@@ -769,12 +775,15 @@ static int toshiba_accelerometer_get(struct toshiba_acpi_dev *dev,
 	if (ACPI_FAILURE(status) || out[0] == TOS_INPUT_DATA_ERROR) {
 		pr_err("ACPI call to query the accelerometer failed\n");
 		return -EIO;
+	} else if (out[0] == TOS_NOT_SUPPORTED) {
+		return -ENODEV;
+	} else if (out[0] == TOS_SUCCESS) {
+		*xy = out[2];
+		*z = out[4];
+		return 0;
 	}
 
-	*xy = out[2];
-	*z = out[4];
-
-	return 0;
+	return -EIO;
 }
 
 /* Sleep (Charge and Music) utilities support */
@@ -835,7 +844,7 @@ static int toshiba_usb_sleep_charge_get(struct toshiba_acpi_dev *dev,
 		return -EIO;
 	}
 
-	return 0;
+	return result == TOS_SUCCESS ? 0 : -EIO;
 }
 
 static int toshiba_usb_sleep_charge_set(struct toshiba_acpi_dev *dev,
@@ -857,7 +866,7 @@ static int toshiba_usb_sleep_charge_set(struct toshiba_acpi_dev *dev,
 		return -EIO;
 	}
 
-	return 0;
+	return result == TOS_SUCCESS ? 0 : -EIO;
 }
 
 static int toshiba_sleep_functions_status_get(struct toshiba_acpi_dev *dev,
@@ -880,11 +889,12 @@ static int toshiba_sleep_functions_status_get(struct toshiba_acpi_dev *dev,
 		return -ENODEV;
 	} else if (out[0] == TOS_INPUT_DATA_ERROR) {
 		return -EIO;
+	} else if (out[0] == TOS_SUCCESS) {
+		*mode = out[2];
+		return 0;
 	}
 
-	*mode = out[2];
-
-	return 0;
+	return -EIO;
 }
 
 static int toshiba_sleep_functions_status_set(struct toshiba_acpi_dev *dev,
@@ -910,7 +920,7 @@ static int toshiba_sleep_functions_status_set(struct toshiba_acpi_dev *dev,
 		return -EIO;
 	}
 
-	return 0;
+	return out[0] == TOS_SUCCESS ? 0 : -EIO;
 }
 
 static int toshiba_usb_rapid_charge_get(struct toshiba_acpi_dev *dev,
@@ -932,11 +942,12 @@ static int toshiba_usb_rapid_charge_get(struct toshiba_acpi_dev *dev,
 	} else if (out[0] == TOS_NOT_SUPPORTED ||
 		   out[0] == TOS_INPUT_DATA_ERROR) {
 		return -ENODEV;
+	} else if (out[0] == TOS_SUCCESS || out[0] == TOS_SUCCESS2) {
+		*state = out[2];
+		return 0;
 	}
 
-	*state = out[2];
-
-	return 0;
+	return -EIO;
 }
 
 static int toshiba_usb_rapid_charge_set(struct toshiba_acpi_dev *dev,
@@ -962,7 +973,7 @@ static int toshiba_usb_rapid_charge_set(struct toshiba_acpi_dev *dev,
 		return -EIO;
 	}
 
-	return 0;
+	return (out[0] == TOS_SUCCESS || out[0] == TOS_SUCCESS2) ? 0 : -EIO;
 }
 
 static int toshiba_usb_sleep_music_get(struct toshiba_acpi_dev *dev, u32 *state)
@@ -983,7 +994,7 @@ static int toshiba_usb_sleep_music_get(struct toshiba_acpi_dev *dev, u32 *state)
 		return -EIO;
 	}
 
-	return 0;
+	return result = TOS_SUCCESS ? 0 : -EIO;
 }
 
 static int toshiba_usb_sleep_music_set(struct toshiba_acpi_dev *dev, u32 state)
@@ -1004,7 +1015,7 @@ static int toshiba_usb_sleep_music_set(struct toshiba_acpi_dev *dev, u32 state)
 		return -EIO;
 	}
 
-	return 0;
+	return result == TOS_SUCCESS ? 0 : -EIO;
 }
 
 /* Keyboard function keys */
@@ -1024,7 +1035,7 @@ static int toshiba_function_keys_get(struct toshiba_acpi_dev *dev, u32 *mode)
 		return -ENODEV;
 	}
 
-	return 0;
+	return (result == TOS_SUCCESS || result == TOS_SUCCESS2) ? 0 : -EIO;
 }
 
 static int toshiba_function_keys_set(struct toshiba_acpi_dev *dev, u32 mode)
@@ -1043,7 +1054,7 @@ static int toshiba_function_keys_set(struct toshiba_acpi_dev *dev, u32 mode)
 		return -ENODEV;
 	}
 
-	return 0;
+	return (result == TOS_SUCCESS || result == TOS_SUCCESS2) ? 0 : -EIO;
 }
 
 /* Panel Power ON */
@@ -1065,7 +1076,7 @@ static int toshiba_panel_power_on_get(struct toshiba_acpi_dev *dev, u32 *state)
 		return -EIO;
 	}
 
-	return 0;
+	return result == TOS_SUCCESS ? 0 : -EIO;
 }
 
 static int toshiba_panel_power_on_set(struct toshiba_acpi_dev *dev, u32 state)
@@ -1086,7 +1097,7 @@ static int toshiba_panel_power_on_set(struct toshiba_acpi_dev *dev, u32 state)
 		return -EIO;
 	}
 
-	return 0;
+	return result == TOS_SUCCESS ? 0 : -EIO;
 }
 
 /* USB Three */
@@ -1108,7 +1119,7 @@ static int toshiba_usb_three_get(struct toshiba_acpi_dev *dev, u32 *state)
 		return -EIO;
 	}
 
-	return 0;
+	return (result == TOS_SUCCESS || result == TOS_SUCCESS2) ? 0 : -EIO;
 }
 
 static int toshiba_usb_three_set(struct toshiba_acpi_dev *dev, u32 state)
@@ -1129,7 +1140,7 @@ static int toshiba_usb_three_set(struct toshiba_acpi_dev *dev, u32 state)
 		return -EIO;
 	}
 
-	return 0;
+	return (result == TOS_SUCCESS || result == TOS_SUCCESS2) ? 0 : -EIO;
 }
 
 /* Hotkey Event type */
@@ -1146,26 +1157,37 @@ static int toshiba_hotkey_event_type_get(struct toshiba_acpi_dev *dev,
 		return -EIO;
 	} else if (out[0] == TOS_NOT_SUPPORTED) {
 		return -ENODEV;
+	} else if (out[0] == TOS_SUCCESS) {
+		*type = out[3];
+		return 0;
 	}
 
-	*type = out[3];
-
-	return 0;
+	return -EIO;
 }
 
 /* Transflective Backlight */
 static int get_tr_backlight_status(struct toshiba_acpi_dev *dev, u32 *status)
 {
-	u32 hci_result = hci_read(dev, HCI_TR_BACKLIGHT, status);
+	u32 result = hci_read(dev, HCI_TR_BACKLIGHT, status);
+
+	if (result == TOS_FAILURE)
+		pr_err("ACPI call to get Transflective Backlight failed\n");
+	else if (result == TOS_NOT_SUPPORTED)
+		return -ENODEV;
 
-	return hci_result == TOS_SUCCESS ? 0 : -EIO;
+	return result == TOS_SUCCESS ? 0 : -EIO;
 }
 
 static int set_tr_backlight_status(struct toshiba_acpi_dev *dev, u32 status)
 {
-	u32 hci_result = hci_write(dev, HCI_TR_BACKLIGHT, !status);
+	u32 result = hci_write(dev, HCI_TR_BACKLIGHT, !status);
+
+	if (result == TOS_FAILURE)
+		pr_err("ACPI call to set Transflective Backlight failed\n");
+	else if (result == TOS_NOT_SUPPORTED)
+		return -ENODEV;
 
-	return hci_result == TOS_SUCCESS ? 0 : -EIO;
+	return result == TOS_SUCCESS ? 0 : -EIO;
 }
 
 static struct proc_dir_entry *toshiba_proc_dir;
@@ -1173,7 +1195,7 @@ static struct proc_dir_entry *toshiba_proc_dir;
 /* LCD Brightness */
 static int __get_lcd_brightness(struct toshiba_acpi_dev *dev)
 {
-	u32 hci_result;
+	u32 result;
 	u32 value;
 	int brightness = 0;
 
@@ -1187,8 +1209,12 @@ static int __get_lcd_brightness(struct toshiba_acpi_dev *dev)
 		brightness++;
 	}
 
-	hci_result = hci_read(dev, HCI_LCD_BRIGHTNESS, &value);
-	if (hci_result == TOS_SUCCESS)
+	result = hci_read(dev, HCI_LCD_BRIGHTNESS, &value);
+	if (result == TOS_FAILURE)
+		pr_err("ACPI call to get LCD Brightness failed\n");
+	else if (result == TOS_NOT_SUPPORTED)
+		return -ENODEV;
+	if (result == TOS_SUCCESS)
 		return brightness + (value >> HCI_LCD_BRIGHTNESS_SHIFT);
 
 	return -EIO;
@@ -1204,8 +1230,8 @@ static int get_lcd_brightness(struct backlight_device *bd)
 static int lcd_proc_show(struct seq_file *m, void *v)
 {
 	struct toshiba_acpi_dev *dev = m->private;
-	int value;
 	int levels;
+	int value;
 
 	if (!dev->backlight_dev)
 		return -ENODEV;
@@ -1219,6 +1245,7 @@ static int lcd_proc_show(struct seq_file *m, void *v)
 	}
 
 	pr_err("Error reading LCD brightness\n");
+
 	return -EIO;
 }
 
@@ -1229,7 +1256,7 @@ static int lcd_proc_open(struct inode *inode, struct file *file)
 
 static int set_lcd_brightness(struct toshiba_acpi_dev *dev, int value)
 {
-	u32 hci_result;
+	u32 result;
 
 	if (dev->tr_backlight_supported) {
 		int ret = set_tr_backlight_status(dev, !value);
@@ -1241,8 +1268,13 @@ static int set_lcd_brightness(struct toshiba_acpi_dev *dev, int value)
 	}
 
 	value = value << HCI_LCD_BRIGHTNESS_SHIFT;
-	hci_result = hci_write(dev, HCI_LCD_BRIGHTNESS, value);
-	return hci_result == TOS_SUCCESS ? 0 : -EIO;
+	result = hci_write(dev, HCI_LCD_BRIGHTNESS, value);
+	if (result == TOS_FAILURE)
+		pr_err("ACPI call to set LCD Brightness failed\n");
+	else if (result == TOS_NOT_SUPPORTED)
+		return -ENODEV;
+
+	return result == TOS_SUCCESS ? 0 : -EIO;
 }
 
 static int set_lcd_status(struct backlight_device *bd)
@@ -1258,24 +1290,22 @@ static ssize_t lcd_proc_write(struct file *file, const char __user *buf,
 	struct toshiba_acpi_dev *dev = PDE_DATA(file_inode(file));
 	char cmd[42];
 	size_t len;
-	int value;
-	int ret;
 	int levels = dev->backlight_dev->props.max_brightness + 1;
+	int value;
 
 	len = min(count, sizeof(cmd) - 1);
 	if (copy_from_user(cmd, buf, len))
 		return -EFAULT;
 	cmd[len] = '\0';
 
-	if (sscanf(cmd, " brightness : %i", &value) == 1 &&
-	    value >= 0 && value < levels) {
-		ret = set_lcd_brightness(dev, value);
-		if (ret == 0)
-			ret = count;
-	} else {
-		ret = -EINVAL;
-	}
-	return ret;
+	if (sscanf(cmd, " brightness : %i", &value) != 1 &&
+	    value < 0 && value > levels)
+		return -EINVAL;
+
+	if (set_lcd_brightness(dev, value))
+		return -EIO;
+
+	return count;
 }
 
 static const struct file_operations lcd_proc_fops = {
@@ -1287,22 +1317,25 @@ static const struct file_operations lcd_proc_fops = {
 	.write		= lcd_proc_write,
 };
 
+/* Video-Out */
 static int get_video_status(struct toshiba_acpi_dev *dev, u32 *status)
 {
-	u32 hci_result;
+	u32 result = hci_read(dev, HCI_VIDEO_OUT, status);
+
+	if (result == TOS_FAILURE)
+		pr_err("ACPI call to get Video-Out failed\n");
+	else if (result == TOS_NOT_SUPPORTED)
+		return -ENODEV;
 
-	hci_result = hci_read(dev, HCI_VIDEO_OUT, status);
-	return hci_result == TOS_SUCCESS ? 0 : -EIO;
+	return result == TOS_SUCCESS ? 0 : -EIO;
 }
 
 static int video_proc_show(struct seq_file *m, void *v)
 {
 	struct toshiba_acpi_dev *dev = m->private;
 	u32 value;
-	int ret;
 
-	ret = get_video_status(dev, &value);
-	if (!ret) {
+	if (!get_video_status(dev, &value)) {
 		int is_lcd = (value & HCI_VIDEO_OUT_LCD) ? 1 : 0;
 		int is_crt = (value & HCI_VIDEO_OUT_CRT) ? 1 : 0;
 		int is_tv = (value & HCI_VIDEO_OUT_TV) ? 1 : 0;
@@ -1310,9 +1343,10 @@ static int video_proc_show(struct seq_file *m, void *v)
 		seq_printf(m, "lcd_out:                 %d\n", is_lcd);
 		seq_printf(m, "crt_out:                 %d\n", is_crt);
 		seq_printf(m, "tv_out:                  %d\n", is_tv);
+		return 0;
 	}
 
-	return ret;
+	return -EIO;
 }
 
 static int video_proc_open(struct inode *inode, struct file *file)
@@ -1324,13 +1358,14 @@ static ssize_t video_proc_write(struct file *file, const char __user *buf,
 				size_t count, loff_t *pos)
 {
 	struct toshiba_acpi_dev *dev = PDE_DATA(file_inode(file));
-	char *cmd, *buffer;
-	int ret;
-	int value;
+	char *buffer;
+	char *cmd;
 	int remain = count;
 	int lcd_out = -1;
 	int crt_out = -1;
 	int tv_out = -1;
+	int value;
+	int ret;
 	u32 video_out;
 
 	cmd = kmalloc(count + 1, GFP_KERNEL);
@@ -1382,7 +1417,7 @@ static ssize_t video_proc_write(struct file *file, const char __user *buf,
 			ret = write_acpi_int(METHOD_VIDEO_OUT, new_video_out);
 	}
 
-	return ret ? ret : count;
+	return ret ? -EIO : count;
 }
 
 static const struct file_operations video_proc_fops = {
@@ -1403,10 +1438,8 @@ static int get_fan_status(struct toshiba_acpi_dev *dev, u32 *status)
 		pr_err("ACPI call to get Fan status failed\n");
 	else if (result == TOS_NOT_SUPPORTED)
 		return -ENODEV;
-	else if (result == TOS_SUCCESS)
-		return 0;
 
-	return -EIO;
+	return result == TOS_SUCCESS ? 0 : -EIO;
 }
 
 static int set_fan_status(struct toshiba_acpi_dev *dev, u32 status)
@@ -1417,10 +1450,8 @@ static int set_fan_status(struct toshiba_acpi_dev *dev, u32 status)
 		pr_err("ACPI call to set Fan status failed\n");
 	else if (result == TOS_NOT_SUPPORTED)
 		return -ENODEV;
-	else if (result == TOS_SUCCESS)
-		return 0;
 
-	return -EIO;
+	return result == TOS_SUCCESS ? 0 : -EIO;
 }
 
 static int fan_proc_show(struct seq_file *m, void *v)

From a6b5354f422a1f1cd48a94ed64e54989f9ea7ee2 Mon Sep 17 00:00:00 2001
From: Azael Avalos <coproscefalo@gmail.com>
Date: Fri, 31 Jul 2015 21:58:15 -0600
Subject: [PATCH 091/734] toshiba_acpi: Remove unnecessary checks and returns
 in HCI/SCI functions

A previous patch added explicit feature checks for support, *SUCCESS*
and *FAILURE to the HCI/SCI *{get, set} functions.

This patch removes some unnedded checks to the driver HCI/SCI
functions given that the default error return value is now set to
-EIO, there is no need to check for other error values other than
the ones currently checking for.

Signed-off-by: Azael Avalos <coproscefalo@gmail.com>
Signed-off-by: Darren Hart <dvhart@linux.intel.com>
---
 drivers/platform/x86/toshiba_acpi.c | 169 ++++++++--------------------
 1 file changed, 44 insertions(+), 125 deletions(-)

diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index 65adb122189fc7..c18469903eeed3 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -459,8 +459,6 @@ static void toshiba_illumination_available(struct toshiba_acpi_dev *dev)
 	sci_close(dev);
 	if (ACPI_FAILURE(status))
 		pr_err("ACPI call to query Illumination support failed\n");
-	else if (out[0] == TOS_NOT_SUPPORTED)
-		return;
 	else if (out[0] == TOS_SUCCESS)
 		dev->illumination_supported = 1;
 }
@@ -481,12 +479,8 @@ static void toshiba_illumination_set(struct led_classdev *cdev,
 	state = brightness ? 1 : 0;
 	result = sci_write(dev, SCI_ILLUMINATION, state);
 	sci_close(dev);
-	if (result == TOS_FAILURE) {
+	if (result == TOS_FAILURE)
 		pr_err("ACPI call for illumination failed\n");
-		return;
-	} else if (result == TOS_NOT_SUPPORTED) {
-		return;
-	}
 }
 
 static enum led_brightness toshiba_illumination_get(struct led_classdev *cdev)
@@ -502,7 +496,7 @@ static enum led_brightness toshiba_illumination_get(struct led_classdev *cdev)
 	/* Check the illumination */
 	result = sci_read(dev, SCI_ILLUMINATION, &state);
 	sci_close(dev);
-	if (result == TOS_FAILURE || result == TOS_INPUT_DATA_ERROR) {
+	if (result == TOS_FAILURE) {
 		pr_err("ACPI call for illumination failed\n");
 		return LED_OFF;
 	} else if (result != TOS_SUCCESS) {
@@ -527,10 +521,8 @@ static void toshiba_kbd_illum_available(struct toshiba_acpi_dev *dev)
 
 	status = tci_raw(dev, in, out);
 	sci_close(dev);
-	if (ACPI_FAILURE(status) || out[0] == TOS_INPUT_DATA_ERROR) {
+	if (ACPI_FAILURE(status)) {
 		pr_err("ACPI call to query kbd illumination support failed\n");
-	} else if (out[0] == TOS_NOT_SUPPORTED) {
-		return;
 	} else if (out[0] == TOS_SUCCESS) {
 		/*
 		 * Check for keyboard backlight timeout max value,
@@ -560,12 +552,10 @@ static int toshiba_kbd_illum_status_set(struct toshiba_acpi_dev *dev, u32 time)
 
 	result = sci_write(dev, SCI_KBD_ILLUM_STATUS, time);
 	sci_close(dev);
-	if (result == TOS_FAILURE || result == TOS_INPUT_DATA_ERROR) {
+	if (result == TOS_FAILURE)
 		pr_err("ACPI call to set KBD backlight status failed\n");
-		return -EIO;
-	} else if (result == TOS_NOT_SUPPORTED) {
+	else if (result == TOS_NOT_SUPPORTED)
 		return -ENODEV;
-	}
 
 	return result == TOS_SUCCESS ? 0 : -EIO;
 }
@@ -579,12 +569,10 @@ static int toshiba_kbd_illum_status_get(struct toshiba_acpi_dev *dev, u32 *time)
 
 	result = sci_read(dev, SCI_KBD_ILLUM_STATUS, time);
 	sci_close(dev);
-	if (result == TOS_FAILURE || result == TOS_INPUT_DATA_ERROR) {
+	if (result == TOS_FAILURE)
 		pr_err("ACPI call to get KBD backlight status failed\n");
-		return -EIO;
-	} else if (result == TOS_NOT_SUPPORTED) {
+	else if (result == TOS_NOT_SUPPORTED)
 		return -ENODEV;
-	}
 
 	return result == TOS_SUCCESS ? 0 : -EIO;
 }
@@ -598,7 +586,7 @@ static enum led_brightness toshiba_kbd_backlight_get(struct led_classdev *cdev)
 
 	/* Check the keyboard backlight state */
 	result = hci_read(dev, HCI_KBD_ILLUMINATION, &state);
-	if (result == TOS_FAILURE || result == TOS_INPUT_DATA_ERROR) {
+	if (result == TOS_FAILURE) {
 		pr_err("ACPI call to get the keyboard backlight failed\n");
 		return LED_OFF;
 	} else if (result != TOS_SUCCESS) {
@@ -619,12 +607,8 @@ static void toshiba_kbd_backlight_set(struct led_classdev *cdev,
 	/* Set the keyboard backlight state */
 	state = brightness ? 1 : 0;
 	result = hci_write(dev, HCI_KBD_ILLUMINATION, state);
-	if (result == TOS_FAILURE || result == TOS_INPUT_DATA_ERROR) {
+	if (result == TOS_FAILURE)
 		pr_err("ACPI call to set KBD Illumination mode failed\n");
-		return;
-	} else if (result == TOS_NOT_SUPPORTED) {
-		return;
-	}
 }
 
 /* TouchPad support */
@@ -637,12 +621,10 @@ static int toshiba_touchpad_set(struct toshiba_acpi_dev *dev, u32 state)
 
 	result = sci_write(dev, SCI_TOUCHPAD, state);
 	sci_close(dev);
-	if (result == TOS_FAILURE) {
+	if (result == TOS_FAILURE)
 		pr_err("ACPI call to set the touchpad failed\n");
-		return -EIO;
-	} else if (result == TOS_NOT_SUPPORTED) {
+	else if (result == TOS_NOT_SUPPORTED)
 		return -ENODEV;
-	}
 
 	return result == TOS_SUCCESS ? 0 : -EIO;
 }
@@ -656,12 +638,10 @@ static int toshiba_touchpad_get(struct toshiba_acpi_dev *dev, u32 *state)
 
 	result = sci_read(dev, SCI_TOUCHPAD, state);
 	sci_close(dev);
-	if (result == TOS_FAILURE) {
+	if (result == TOS_FAILURE)
 		pr_err("ACPI call to query the touchpad failed\n");
-		return -EIO;
-	} else if (result == TOS_NOT_SUPPORTED) {
+	else if (result == TOS_NOT_SUPPORTED)
 		return -ENODEV;
-	}
 
 	return result == TOS_SUCCESS ? 0 : -EIO;
 }
@@ -679,8 +659,6 @@ static void toshiba_eco_mode_available(struct toshiba_acpi_dev *dev)
 	status = tci_raw(dev, in, out);
 	if (ACPI_FAILURE(status)) {
 		pr_err("ACPI call to get ECO led failed\n");
-	} else if (out[0] == TOS_NOT_INSTALLED) {
-		pr_info("ECO led not installed");
 	} else if (out[0] == TOS_INPUT_DATA_ERROR) {
 		/*
 		 * If we receive 0x8300 (Input Data Error), it means that the
@@ -693,7 +671,7 @@ static void toshiba_eco_mode_available(struct toshiba_acpi_dev *dev)
 		 */
 		in[3] = 1;
 		status = tci_raw(dev, in, out);
-		if (ACPI_FAILURE(status) || out[0] == TOS_FAILURE)
+		if (ACPI_FAILURE(status))
 			pr_err("ACPI call to get ECO led failed\n");
 		else if (out[0] == TOS_SUCCESS)
 			dev->eco_supported = 1;
@@ -710,7 +688,7 @@ toshiba_eco_mode_get_status(struct led_classdev *cdev)
 	acpi_status status;
 
 	status = tci_raw(dev, in, out);
-	if (ACPI_FAILURE(status) || out[0] == TOS_INPUT_DATA_ERROR) {
+	if (ACPI_FAILURE(status)) {
 		pr_err("ACPI call to get ECO led failed\n");
 		return LED_OFF;
 	} else if (out[0] != TOS_SUCCESS) {
@@ -732,10 +710,8 @@ static void toshiba_eco_mode_set_status(struct led_classdev *cdev,
 	/* Switch the Eco Mode led on/off */
 	in[2] = (brightness) ? 1 : 0;
 	status = tci_raw(dev, in, out);
-	if (ACPI_FAILURE(status) || out[0] == TOS_INPUT_DATA_ERROR) {
+	if (ACPI_FAILURE(status))
 		pr_err("ACPI call to set ECO led failed\n");
-		return;
-	}
 }
 
 /* Accelerometer support */
@@ -752,19 +728,14 @@ static void toshiba_accelerometer_available(struct toshiba_acpi_dev *dev)
 	 * this call also serves as initialization
 	 */
 	status = tci_raw(dev, in, out);
-	if (ACPI_FAILURE(status) || out[0] == TOS_INPUT_DATA_ERROR)
+	if (ACPI_FAILURE(status))
 		pr_err("ACPI call to query the accelerometer failed\n");
-	else if (out[0] == TOS_DATA_NOT_AVAILABLE ||
-		   out[0] == TOS_NOT_INITIALIZED)
-		pr_err("Accelerometer not initialized\n");
-	else if (out[0] == TOS_NOT_SUPPORTED)
-		return;
 	else if (out[0] == TOS_SUCCESS)
 		dev->accelerometer_supported = 1;
 }
 
 static int toshiba_accelerometer_get(struct toshiba_acpi_dev *dev,
-				      u32 *xy, u32 *z)
+				     u32 *xy, u32 *z)
 {
 	u32 in[TCI_WORDS] = { HCI_GET, HCI_ACCELEROMETER, 0, 1, 0, 0 };
 	u32 out[TCI_WORDS];
@@ -772,7 +743,7 @@ static int toshiba_accelerometer_get(struct toshiba_acpi_dev *dev,
 
 	/* Check the Accelerometer status */
 	status = tci_raw(dev, in, out);
-	if (ACPI_FAILURE(status) || out[0] == TOS_INPUT_DATA_ERROR) {
+	if (ACPI_FAILURE(status)) {
 		pr_err("ACPI call to query the accelerometer failed\n");
 		return -EIO;
 	} else if (out[0] == TOS_NOT_SUPPORTED) {
@@ -815,8 +786,6 @@ static void toshiba_usb_sleep_charge_available(struct toshiba_acpi_dev *dev)
 	sci_close(dev);
 	if (ACPI_FAILURE(status)) {
 		pr_err("ACPI call to get USB Sleep and Charge mode failed\n");
-	} else if (out[0] == TOS_NOT_SUPPORTED) {
-		return;
 	} else if (out[0] == TOS_SUCCESS) {
 		dev->usbsc_bat_level = out[2];
 		/* Flag as supported */
@@ -835,14 +804,10 @@ static int toshiba_usb_sleep_charge_get(struct toshiba_acpi_dev *dev,
 
 	result = sci_read(dev, SCI_USB_SLEEP_CHARGE, mode);
 	sci_close(dev);
-	if (result == TOS_FAILURE) {
+	if (result == TOS_FAILURE)
 		pr_err("ACPI call to set USB S&C mode failed\n");
-		return -EIO;
-	} else if (result == TOS_NOT_SUPPORTED) {
+	else if (result == TOS_NOT_SUPPORTED)
 		return -ENODEV;
-	} else if (result == TOS_INPUT_DATA_ERROR) {
-		return -EIO;
-	}
 
 	return result == TOS_SUCCESS ? 0 : -EIO;
 }
@@ -857,14 +822,10 @@ static int toshiba_usb_sleep_charge_set(struct toshiba_acpi_dev *dev,
 
 	result = sci_write(dev, SCI_USB_SLEEP_CHARGE, mode);
 	sci_close(dev);
-	if (result == TOS_FAILURE) {
+	if (result == TOS_FAILURE)
 		pr_err("ACPI call to set USB S&C mode failed\n");
-		return -EIO;
-	} else if (result == TOS_NOT_SUPPORTED) {
+	else if (result == TOS_NOT_SUPPORTED)
 		return -ENODEV;
-	} else if (result == TOS_INPUT_DATA_ERROR) {
-		return -EIO;
-	}
 
 	return result == TOS_SUCCESS ? 0 : -EIO;
 }
@@ -884,11 +845,8 @@ static int toshiba_sleep_functions_status_get(struct toshiba_acpi_dev *dev,
 	sci_close(dev);
 	if (ACPI_FAILURE(status)) {
 		pr_err("ACPI call to get USB S&C battery level failed\n");
-		return -EIO;
 	} else if (out[0] == TOS_NOT_SUPPORTED) {
 		return -ENODEV;
-	} else if (out[0] == TOS_INPUT_DATA_ERROR) {
-		return -EIO;
 	} else if (out[0] == TOS_SUCCESS) {
 		*mode = out[2];
 		return 0;
@@ -911,14 +869,10 @@ static int toshiba_sleep_functions_status_set(struct toshiba_acpi_dev *dev,
 	in[5] = SCI_USB_CHARGE_BAT_LVL;
 	status = tci_raw(dev, in, out);
 	sci_close(dev);
-	if (ACPI_FAILURE(status)) {
+	if (ACPI_FAILURE(status))
 		pr_err("ACPI call to set USB S&C battery level failed\n");
-		return -EIO;
-	} else if (out[0] == TOS_NOT_SUPPORTED) {
+	else if (out[0] == TOS_NOT_SUPPORTED)
 		return -ENODEV;
-	} else if (out[0] == TOS_INPUT_DATA_ERROR) {
-		return -EIO;
-	}
 
 	return out[0] == TOS_SUCCESS ? 0 : -EIO;
 }
@@ -938,9 +892,7 @@ static int toshiba_usb_rapid_charge_get(struct toshiba_acpi_dev *dev,
 	sci_close(dev);
 	if (ACPI_FAILURE(status)) {
 		pr_err("ACPI call to get USB Rapid Charge failed\n");
-		return -EIO;
-	} else if (out[0] == TOS_NOT_SUPPORTED ||
-		   out[0] == TOS_INPUT_DATA_ERROR) {
+	} else if (out[0] == TOS_NOT_SUPPORTED) {
 		return -ENODEV;
 	} else if (out[0] == TOS_SUCCESS || out[0] == TOS_SUCCESS2) {
 		*state = out[2];
@@ -964,14 +916,10 @@ static int toshiba_usb_rapid_charge_set(struct toshiba_acpi_dev *dev,
 	in[5] = SCI_USB_CHARGE_RAPID_DSP;
 	status = tci_raw(dev, in, out);
 	sci_close(dev);
-	if (ACPI_FAILURE(status)) {
+	if (ACPI_FAILURE(status))
 		pr_err("ACPI call to set USB Rapid Charge failed\n");
-		return -EIO;
-	} else if (out[0] == TOS_NOT_SUPPORTED) {
+	else if (out[0] == TOS_NOT_SUPPORTED)
 		return -ENODEV;
-	} else if (out[0] == TOS_INPUT_DATA_ERROR) {
-		return -EIO;
-	}
 
 	return (out[0] == TOS_SUCCESS || out[0] == TOS_SUCCESS2) ? 0 : -EIO;
 }
@@ -985,14 +933,10 @@ static int toshiba_usb_sleep_music_get(struct toshiba_acpi_dev *dev, u32 *state)
 
 	result = sci_read(dev, SCI_USB_SLEEP_MUSIC, state);
 	sci_close(dev);
-	if (result == TOS_FAILURE) {
+	if (result == TOS_FAILURE)
 		pr_err("ACPI call to get Sleep and Music failed\n");
-		return -EIO;
-	} else if (result == TOS_NOT_SUPPORTED) {
+	else if (result == TOS_NOT_SUPPORTED)
 		return -ENODEV;
-	} else if (result == TOS_INPUT_DATA_ERROR) {
-		return -EIO;
-	}
 
 	return result = TOS_SUCCESS ? 0 : -EIO;
 }
@@ -1006,14 +950,10 @@ static int toshiba_usb_sleep_music_set(struct toshiba_acpi_dev *dev, u32 state)
 
 	result = sci_write(dev, SCI_USB_SLEEP_MUSIC, state);
 	sci_close(dev);
-	if (result == TOS_FAILURE) {
+	if (result == TOS_FAILURE)
 		pr_err("ACPI call to set Sleep and Music failed\n");
-		return -EIO;
-	} else if (result == TOS_NOT_SUPPORTED) {
+	else if (result == TOS_NOT_SUPPORTED)
 		return -ENODEV;
-	} else if (result == TOS_INPUT_DATA_ERROR) {
-		return -EIO;
-	}
 
 	return result == TOS_SUCCESS ? 0 : -EIO;
 }
@@ -1028,12 +968,10 @@ static int toshiba_function_keys_get(struct toshiba_acpi_dev *dev, u32 *mode)
 
 	result = sci_read(dev, SCI_KBD_FUNCTION_KEYS, mode);
 	sci_close(dev);
-	if (result == TOS_FAILURE || result == TOS_INPUT_DATA_ERROR) {
+	if (result == TOS_FAILURE)
 		pr_err("ACPI call to get KBD function keys failed\n");
-		return -EIO;
-	} else if (result == TOS_NOT_SUPPORTED) {
+	else if (result == TOS_NOT_SUPPORTED)
 		return -ENODEV;
-	}
 
 	return (result == TOS_SUCCESS || result == TOS_SUCCESS2) ? 0 : -EIO;
 }
@@ -1047,12 +985,10 @@ static int toshiba_function_keys_set(struct toshiba_acpi_dev *dev, u32 mode)
 
 	result = sci_write(dev, SCI_KBD_FUNCTION_KEYS, mode);
 	sci_close(dev);
-	if (result == TOS_FAILURE || result == TOS_INPUT_DATA_ERROR) {
+	if (result == TOS_FAILURE)
 		pr_err("ACPI call to set KBD function keys failed\n");
-		return -EIO;
-	} else if (result == TOS_NOT_SUPPORTED) {
+	else if (result == TOS_NOT_SUPPORTED)
 		return -ENODEV;
-	}
 
 	return (result == TOS_SUCCESS || result == TOS_SUCCESS2) ? 0 : -EIO;
 }
@@ -1067,14 +1003,10 @@ static int toshiba_panel_power_on_get(struct toshiba_acpi_dev *dev, u32 *state)
 
 	result = sci_read(dev, SCI_PANEL_POWER_ON, state);
 	sci_close(dev);
-	if (result == TOS_FAILURE) {
+	if (result == TOS_FAILURE)
 		pr_err("ACPI call to get Panel Power ON failed\n");
-		return -EIO;
-	} else if (result == TOS_NOT_SUPPORTED) {
+	else if (result == TOS_NOT_SUPPORTED)
 		return -ENODEV;
-	} else if (result == TOS_INPUT_DATA_ERROR) {
-		return -EIO;
-	}
 
 	return result == TOS_SUCCESS ? 0 : -EIO;
 }
@@ -1088,14 +1020,10 @@ static int toshiba_panel_power_on_set(struct toshiba_acpi_dev *dev, u32 state)
 
 	result = sci_write(dev, SCI_PANEL_POWER_ON, state);
 	sci_close(dev);
-	if (result == TOS_FAILURE) {
+	if (result == TOS_FAILURE)
 		pr_err("ACPI call to set Panel Power ON failed\n");
-		return -EIO;
-	} else if (result == TOS_NOT_SUPPORTED) {
+	else if (result == TOS_NOT_SUPPORTED)
 		return -ENODEV;
-	} else if (result == TOS_INPUT_DATA_ERROR) {
-		return -EIO;
-	}
 
 	return result == TOS_SUCCESS ? 0 : -EIO;
 }
@@ -1110,14 +1038,10 @@ static int toshiba_usb_three_get(struct toshiba_acpi_dev *dev, u32 *state)
 
 	result = sci_read(dev, SCI_USB_THREE, state);
 	sci_close(dev);
-	if (result == TOS_FAILURE) {
+	if (result == TOS_FAILURE)
 		pr_err("ACPI call to get USB 3 failed\n");
-		return -EIO;
-	} else if (result == TOS_NOT_SUPPORTED) {
+	else if (result == TOS_NOT_SUPPORTED)
 		return -ENODEV;
-	} else if (result == TOS_INPUT_DATA_ERROR) {
-		return -EIO;
-	}
 
 	return (result == TOS_SUCCESS || result == TOS_SUCCESS2) ? 0 : -EIO;
 }
@@ -1131,14 +1055,10 @@ static int toshiba_usb_three_set(struct toshiba_acpi_dev *dev, u32 state)
 
 	result = sci_write(dev, SCI_USB_THREE, state);
 	sci_close(dev);
-	if (result == TOS_FAILURE) {
+	if (result == TOS_FAILURE)
 		pr_err("ACPI call to set USB 3 failed\n");
-		return -EIO;
-	} else if (result == TOS_NOT_SUPPORTED) {
+	else if (result == TOS_NOT_SUPPORTED)
 		return -ENODEV;
-	} else if (result == TOS_INPUT_DATA_ERROR) {
-		return -EIO;
-	}
 
 	return (result == TOS_SUCCESS || result == TOS_SUCCESS2) ? 0 : -EIO;
 }
@@ -1154,7 +1074,6 @@ static int toshiba_hotkey_event_type_get(struct toshiba_acpi_dev *dev,
 	status = tci_raw(dev, in, out);
 	if (ACPI_FAILURE(status)) {
 		pr_err("ACPI call to get System type failed\n");
-		return -EIO;
 	} else if (out[0] == TOS_NOT_SUPPORTED) {
 		return -ENODEV;
 	} else if (out[0] == TOS_SUCCESS) {

From 495078f892eef0ac791fd2b32393e1501b5c1c5e Mon Sep 17 00:00:00 2001
From: Azael Avalos <coproscefalo@gmail.com>
Date: Fri, 31 Jul 2015 21:58:16 -0600
Subject: [PATCH 092/734] toshiba_acpi: Bump driver version to 0.23

Given that some features were added (/dev/toshiba_acpi device), some
clean-ups and minor (cosmetic) changes all over the driver code, bump
the driver version to 0.23 to reflect these overall changes.

Signed-off-by: Azael Avalos <coproscefalo@gmail.com>
Signed-off-by: Darren Hart <dvhart@linux.intel.com>
---
 drivers/platform/x86/toshiba_acpi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index c18469903eeed3..6740c513919cc1 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -31,7 +31,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#define TOSHIBA_ACPI_VERSION	"0.22"
+#define TOSHIBA_ACPI_VERSION	"0.23"
 #define PROC_INTERFACE_VERSION	1
 
 #include <linux/kernel.h>

From 8d9b21dcfe6814c92c9f445a7c742b7bab4f86b8 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 5 Aug 2015 12:54:45 +0100
Subject: [PATCH 093/734] ASN.1: Fix handling of CHOICE in ASN.1 compiler

Fix the handling of CHOICE types in the ASN.1 compiler to make SEQUENCE and
SET elements in a CHOICE be correctly rendered as skippable and conditional
as appropriate.

For example, in the following ASN.1:

	Foo ::= SEQUENCE { w1 INTEGER, w2 Bar, w3 OBJECT IDENTIFIER }
	Bar ::= CHOICE {
		x1 Seq1,
		x2 [0] IMPLICIT OCTET STRING,
		x3 Seq2,
		x4 SET OF INTEGER
	}
	Seq1 ::= SEQUENCE { y1 INTEGER, y2 INTEGER, y3 INTEGER }
	Seq2 ::= SEQUENCE { z1 BOOLEAN, z2 BOOLEAN, z3 BOOLEAN }

the output in foo.c generated by:

	./scripts/asn1_compiler foo.asn1 foo.c foo.h

included:

	// Bar
	// Seq1
	[   4] =  ASN1_OP_MATCH,
	[   5] =  _tag(UNIV, CONS, SEQ),
	...
	[  13] =  ASN1_OP_COND_MATCH_OR_SKIP,		// x2
	[  14] =  _tagn(CONT, PRIM,  0),
	// Seq2
	[  15] =  ASN1_OP_MATCH,
	[  16] =  _tag(UNIV, CONS, SEQ),
	...
	[  24] =  ASN1_OP_COND_MATCH_JUMP_OR_SKIP,		// x4
	[  25] =  _tag(UNIV, CONS, SET),
	...
	[  27] =  ASN1_OP_COND_FAIL,

as a result of the CHOICE - but this is wrong on lines 4 and 15 because
both of these should be skippable (one and only one of the four can be
picked) and the one on line 15 should also be conditional so that it is
ignored if anything before it matches.

After the patch, it looks like:

	// Bar
	// Seq1
	[   4] =  ASN1_OP_MATCH_JUMP_OR_SKIP,		// x1
	[   5] =  _tag(UNIV, CONS, SEQ),
	...
	[   7] =  ASN1_OP_COND_MATCH_OR_SKIP,		// x2
	[   8] =  _tagn(CONT, PRIM,  0),
	// Seq2
	[   9] =  ASN1_OP_COND_MATCH_JUMP_OR_SKIP,		// x3
	[  10] =  _tag(UNIV, CONS, SEQ),
	...
	[  12] =  ASN1_OP_COND_MATCH_JUMP_OR_SKIP,		// x4
	[  13] =  _tag(UNIV, CONS, SET),
	...
	[  15] =  ASN1_OP_COND_FAIL,

where all four options are skippable and the second, third and fourth are
all conditional, as is the backstop at the end.

This hasn't been a problem so far because in the ASN.1 specs we have are
either using primitives or are using SET OF and SEQUENCE OF which are
handled correctly.

Whilst we're at it, also make sure that element labels get included in
comments in the output for elements that have complex types.

This cannot be tested with the code as it stands, but rather affects future
code.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-By: David Woodhouse <David.Woodhouse@intel.com>
---
 scripts/asn1_compiler.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/scripts/asn1_compiler.c b/scripts/asn1_compiler.c
index 7750e9c31483b9..e87359cd23c0a8 100644
--- a/scripts/asn1_compiler.c
+++ b/scripts/asn1_compiler.c
@@ -666,7 +666,7 @@ struct element {
 	unsigned	flags;
 #define ELEMENT_IMPLICIT	0x0001
 #define ELEMENT_EXPLICIT	0x0002
-#define ELEMENT_MARKED		0x0004
+#define ELEMENT_TAG_SPECIFIED	0x0004
 #define ELEMENT_RENDERED	0x0008
 #define ELEMENT_SKIPPABLE	0x0010
 #define ELEMENT_CONDITIONAL	0x0020
@@ -879,6 +879,7 @@ static struct element *parse_type(struct token **_cursor, struct token *end,
 
 		element->tag &= ~0x1f;
 		element->tag |= strtoul(cursor->value, &p, 10);
+		element->flags |= ELEMENT_TAG_SPECIFIED;
 		if (p - cursor->value != cursor->size)
 			abort();
 		cursor++;
@@ -1376,7 +1377,7 @@ static void render_out_of_line_list(FILE *out)
  */
 static void render_element(FILE *out, struct element *e, struct element *tag)
 {
-	struct element *ec;
+	struct element *ec, *x;
 	const char *cond, *act;
 	int entry, skippable = 0, outofline = 0;
 
@@ -1435,15 +1436,17 @@ static void render_element(FILE *out, struct element *e, struct element *tag)
 		break;
 	}
 
-	if (e->name)
+	x = tag ?: e;
+	if (x->name)
 		render_more(out, "\t\t// %*.*s",
-			    (int)e->name->size, (int)e->name->size,
-			    e->name->value);
+			    (int)x->name->size, (int)x->name->size,
+			    x->name->value);
 	render_more(out, "\n");
 
 	/* Render the tag */
-	if (!tag)
+	if (!tag || !(tag->flags & ELEMENT_TAG_SPECIFIED))
 		tag = e;
+
 	if (tag->class == ASN1_UNIV &&
 	    tag->tag != 14 &&
 	    tag->tag != 15 &&
@@ -1539,7 +1542,7 @@ static void render_element(FILE *out, struct element *e, struct element *tag)
 
 	case CHOICE:
 		for (ec = e->children; ec; ec = ec->next)
-			render_element(out, ec, NULL);
+			render_element(out, ec, ec);
 		if (!skippable)
 			render_opcode(out, "ASN1_OP_COND_FAIL,\n");
 		if (e->action)

From 3f3af97d8225a58ecdcde7217c030b17e5198226 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 5 Aug 2015 12:54:46 +0100
Subject: [PATCH 094/734] ASN.1: Fix actions on CHOICE elements with IMPLICIT
 tags

In an ASN.1 description where there is a CHOICE construct that contains
elements with IMPLICIT tags that refer to constructed types, actions to be
taken on those elements should be conditional on the corresponding element
actually being matched.  Currently, however, such actions are performed
unconditionally in the middle of processing the CHOICE.

For example, look at elements 'b' and 'e' here:

	A ::= SEQUENCE {
			CHOICE {
			b [0] IMPLICIT B ({ do_XXXXXXXXXXXX_b }),
			c [1] EXPLICIT C ({ do_XXXXXXXXXXXX_c }),
			d [2] EXPLICIT B ({ do_XXXXXXXXXXXX_d }),
			e [3] IMPLICIT C ({ do_XXXXXXXXXXXX_e }),
			f [4] IMPLICIT INTEGER ({ do_XXXXXXXXXXXX_f })
			}
		} ({ do_XXXXXXXXXXXX_A })

	B ::= SET OF OBJECT IDENTIFIER ({ do_XXXXXXXXXXXX_oid })

	C ::= SET OF INTEGER ({ do_XXXXXXXXXXXX_int })

They each have an action (do_XXXXXXXXXXXX_b and do_XXXXXXXXXXXX_e) that
should only be processed if that element is matched.

The problem is that there's no easy place to hang the action off in the
subclause (type B for element 'b' and type C for element 'e') because
subclause opcode sequences can be shared.

To fix this, introduce a conditional action opcode(ASN1_OP_MAYBE_ACT) that
the decoder only processes if the preceding match was successful.  This can
be seen in an excerpt from the output of the fixed ASN.1 compiler for the
above ASN.1 description:

	[  13] =  ASN1_OP_COND_MATCH_JUMP_OR_SKIP,		// e
	[  14] =  _tagn(CONT, CONS,  3),
	[  15] =  _jump_target(45),		// --> C
	[  16] =  ASN1_OP_MAYBE_ACT,
	[  17] =  _action(ACT_do_XXXXXXXXXXXX_e),

In this, if the op at [13] is matched (ie. element 'e' above) then the
action at [16] will be performed.  However, if the op at [13] doesn't match
or is skipped because it is conditional and some previous op matched, then
the action at [16] will be ignored.

Note that to make this work in the decoder, the ASN1_OP_RETURN op must set
the flag to indicate that a match happened.  This is necessary because the
_jump_target() seen above introduces a subclause (in this case an object of
type 'C') which is likely to alter the flag.  Setting the flag here is okay
because to process a subclause, a match must have happened and caused a
jump.

This cannot be tested with the code as it stands, but rather affects future
code.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/asn1_ber_bytecode.h |  3 ++-
 lib/asn1_decoder.c                | 14 +++++++++++++-
 scripts/asn1_compiler.c           |  3 ++-
 3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/include/linux/asn1_ber_bytecode.h b/include/linux/asn1_ber_bytecode.h
index 945d44ae529c31..27f35780aecf3d 100644
--- a/include/linux/asn1_ber_bytecode.h
+++ b/include/linux/asn1_ber_bytecode.h
@@ -61,7 +61,8 @@ enum asn1_opcode {
 	ASN1_OP_COND_FAIL		= 0x1b,
 	ASN1_OP_COMPLETE		= 0x1c,
 	ASN1_OP_ACT			= 0x1d,
-	ASN1_OP_RETURN			= 0x1e,
+	ASN1_OP_MAYBE_ACT		= 0x1e,
+	ASN1_OP_RETURN			= 0x1f,
 
 	/* The following eight have bit 0 -> SET, 1 -> OF, 2 -> ACT */
 	ASN1_OP_END_SEQ			= 0x20,
diff --git a/lib/asn1_decoder.c b/lib/asn1_decoder.c
index 1a000bb050f9f9..55980d7e1ac00b 100644
--- a/lib/asn1_decoder.c
+++ b/lib/asn1_decoder.c
@@ -33,6 +33,7 @@ static const unsigned char asn1_op_lengths[ASN1_OP__NR] = {
 	[ASN1_OP_COND_FAIL]			= 1,
 	[ASN1_OP_COMPLETE]			= 1,
 	[ASN1_OP_ACT]				= 1         + 1,
+	[ASN1_OP_MAYBE_ACT]			= 1         + 1,
 	[ASN1_OP_RETURN]			= 1,
 	[ASN1_OP_END_SEQ]			= 1,
 	[ASN1_OP_END_SEQ_OF]			= 1     + 1,
@@ -177,6 +178,7 @@ int asn1_ber_decoder(const struct asn1_decoder *decoder,
 	unsigned char flags = 0;
 #define FLAG_INDEFINITE_LENGTH	0x01
 #define FLAG_MATCHED		0x02
+#define FLAG_LAST_MATCHED	0x04 /* Last tag matched */
 #define FLAG_CONS		0x20 /* Corresponds to CONS bit in the opcode tag
 				      * - ie. whether or not we are going to parse
 				      *   a compound type.
@@ -211,6 +213,7 @@ int asn1_ber_decoder(const struct asn1_decoder *decoder,
 		if ((op & ASN1_OP_MATCH__COND &&
 		     flags & FLAG_MATCHED) ||
 		    dp == datalen) {
+			flags &= ~FLAG_LAST_MATCHED;
 			pc += asn1_op_lengths[op];
 			goto next_op;
 		}
@@ -422,8 +425,15 @@ int asn1_ber_decoder(const struct asn1_decoder *decoder,
 		pc += asn1_op_lengths[op];
 		goto next_op;
 
+	case ASN1_OP_MAYBE_ACT:
+		if (!(flags & FLAG_LAST_MATCHED)) {
+			pc += asn1_op_lengths[op];
+			goto next_op;
+		}
 	case ASN1_OP_ACT:
 		ret = actions[machine[pc + 1]](context, hdr, tag, data + tdp, len);
+		if (ret < 0)
+			return ret;
 		pc += asn1_op_lengths[op];
 		goto next_op;
 
@@ -431,6 +441,7 @@ int asn1_ber_decoder(const struct asn1_decoder *decoder,
 		if (unlikely(jsp <= 0))
 			goto jump_stack_underflow;
 		pc = jump_stack[--jsp];
+		flags |= FLAG_MATCHED | FLAG_LAST_MATCHED;
 		goto next_op;
 
 	default:
@@ -438,7 +449,8 @@ int asn1_ber_decoder(const struct asn1_decoder *decoder,
 	}
 
 	/* Shouldn't reach here */
-	pr_err("ASN.1 decoder error: Found reserved opcode (%u)\n", op);
+	pr_err("ASN.1 decoder error: Found reserved opcode (%u) pc=%zu\n",
+	       op, pc);
 	return -EBADMSG;
 
 data_overrun_error:
diff --git a/scripts/asn1_compiler.c b/scripts/asn1_compiler.c
index e87359cd23c0a8..0515bced929a48 100644
--- a/scripts/asn1_compiler.c
+++ b/scripts/asn1_compiler.c
@@ -1468,7 +1468,8 @@ static void render_element(FILE *out, struct element *e, struct element *tag)
 	case TYPE_REF:
 		render_element(out, e->type->type->element, tag);
 		if (e->action)
-			render_opcode(out, "ASN1_OP_ACT,\n");
+			render_opcode(out, "ASN1_OP_%sACT,\n",
+				      skippable ? "MAYBE_" : "");
 		break;
 
 	case SEQUENCE:

From 0d62e9dd6da45bbf0f33a8617afc5fe774c8f45f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 5 Aug 2015 12:54:46 +0100
Subject: [PATCH 095/734] ASN.1: Fix non-match detection failure on data
 overrun

If the ASN.1 decoder is asked to parse a sequence of objects, non-optional
matches get skipped if there's no more data to be had rather than a
data-overrun error being reported.

This is due to the code segment that decides whether to skip optional
matches (ie. matches that could get ignored because an element is marked
OPTIONAL in the grammar) due to a lack of data also skips non-optional
elements if the data pointer has reached the end of the buffer.

This can be tested with the data decoder for the new RSA akcipher algorithm
that takes three non-optional integers.  Currently, it skips the last
integer if there is insufficient data.

Without the fix, #defining DEBUG in asn1_decoder.c will show something
like:

	next_op: pc=0/13 dp=0/270 C=0 J=0
	- match? 30 30 00
	- TAG: 30 266 CONS
	next_op: pc=2/13 dp=4/270 C=1 J=0
	- match? 02 02 00
	- TAG: 02 257
	- LEAF: 257
	next_op: pc=5/13 dp=265/270 C=1 J=0
	- match? 02 02 00
	- TAG: 02 3
	- LEAF: 3
	next_op: pc=8/13 dp=270/270 C=1 J=0
	next_op: pc=11/13 dp=270/270 C=1 J=0
	- end cons t=4 dp=270 l=270/270

The next_op line for pc=8/13 should be followed by a match line.

This is not exploitable for X.509 certificates by means of shortening the
message and fixing up the ASN.1 CONS tags because:

 (1) The relevant records being built up are cleared before use.

 (2) If the message is shortened sufficiently to remove the public key, the
     ASN.1 parse of the RSA key will fail quickly due to a lack of data.

 (3) Extracted signature data is either turned into MPIs (which cope with a
     0 length) or is simpler integers specifying algoritms and suchlike
     (which can validly be 0); and

 (4) The AKID and SKID extensions are optional and their removal is handled
     without risking passing a NULL to asymmetric_key_generate_id().

 (5) If the certificate is truncated sufficiently to remove the subject,
     issuer or serialNumber then the ASN.1 decoder will fail with a 'Cons
     stack underflow' return.

This is not exploitable for PKCS#7 messages by means of removal of elements
from such a message from the tail end of a sequence:

 (1) Any shortened X.509 certs embedded in the PKCS#7 message are survivable
     as detailed above.

 (2) The message digest content isn't used if it shows a NULL pointer,
     similarly, the authattrs aren't used if that shows a NULL pointer.

 (3) A missing signature results in a NULL MPI - which the MPI routines deal
     with.

 (4) If data is NULL, it is expected that the message has detached content and
     that is handled appropriately.

 (5) If the serialNumber is excised, the unconditional action associated
     with it will pick up the containing SEQUENCE instead, so no NULL
     pointer will be seen here.

     If both the issuer and the serialNumber are excised, the ASN.1 decode
     will fail with an 'Unexpected tag' return.

     In either case, there's no way to get to asymmetric_key_generate_id()
     with a NULL pointer.

 (6) Other fields are decoded to simple integers.  Shortening the message
     to omit an algorithm ID field will cause checks on this to fail early
     in the verification process.


This can also be tested by snipping objects off of the end of the ASN.1 stream
such that mandatory tags are removed - or even from the end of internal
SEQUENCEs.  If any mandatory tag is missing, the error EBADMSG *should* be
produced.  Without this patch ERANGE or ENOPKG might be produced or the parse
may apparently succeed, perhaps with ENOKEY or EKEYREJECTED being produced
later, depending on what gets snipped.

Just snipping off the final BIT_STRING or OCTET_STRING from either sample
should be a start since both are mandatory and neither will cause an EBADMSG
without the patches

Reported-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: David Woodhouse <David.Woodhouse@intel.com>
---
 lib/asn1_decoder.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/lib/asn1_decoder.c b/lib/asn1_decoder.c
index 55980d7e1ac00b..3f74dd3e29107f 100644
--- a/lib/asn1_decoder.c
+++ b/lib/asn1_decoder.c
@@ -210,9 +210,8 @@ int asn1_ber_decoder(const struct asn1_decoder *decoder,
 		unsigned char tmp;
 
 		/* Skip conditional matches if possible */
-		if ((op & ASN1_OP_MATCH__COND &&
-		     flags & FLAG_MATCHED) ||
-		    dp == datalen) {
+		if ((op & ASN1_OP_MATCH__COND && flags & FLAG_MATCHED) ||
+		    (op & ASN1_OP_MATCH__SKIP && dp == datalen)) {
 			flags &= ~FLAG_LAST_MATCHED;
 			pc += asn1_op_lengths[op];
 			goto next_op;

From 233ce79db4b23a174bcf30bde5d6ad913d5f46d3 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 5 Aug 2015 12:54:46 +0100
Subject: [PATCH 096/734] ASN.1: Handle 'ANY OPTIONAL' in grammar

An ANY object in an ASN.1 grammar that is marked OPTIONAL should be skipped
if there is no more data to be had.

This can be tested by editing X.509 certificates or PKCS#7 messages to
remove the NULL from subobjects that look like the following:

	SEQUENCE {
	  OBJECT(2a864886f70d01010b);
	  NULL();
	}

This is an algorithm identifier plus an optional parameter.

The modified DER can be passed to one of:

	keyctl padd asymmetric "" @s </tmp/modified.x509
	keyctl padd pkcs7_test foo @s </tmp/modified.pkcs7

It should work okay with the patch and produce EBADMSG without.

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/asn1_ber_bytecode.h | 17 +++++++++++------
 lib/asn1_decoder.c                |  8 ++++++++
 scripts/asn1_compiler.c           |  3 ++-
 3 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/include/linux/asn1_ber_bytecode.h b/include/linux/asn1_ber_bytecode.h
index 27f35780aecf3d..ab3a6c002f7b9d 100644
--- a/include/linux/asn1_ber_bytecode.h
+++ b/include/linux/asn1_ber_bytecode.h
@@ -45,24 +45,27 @@ enum asn1_opcode {
 	ASN1_OP_MATCH_JUMP		= 0x04,
 	ASN1_OP_MATCH_JUMP_OR_SKIP	= 0x05,
 	ASN1_OP_MATCH_ANY		= 0x08,
+	ASN1_OP_MATCH_ANY_OR_SKIP	= 0x09,
 	ASN1_OP_MATCH_ANY_ACT		= 0x0a,
+	ASN1_OP_MATCH_ANY_ACT_OR_SKIP	= 0x0b,
 	/* Everything before here matches unconditionally */
 
 	ASN1_OP_COND_MATCH_OR_SKIP	= 0x11,
 	ASN1_OP_COND_MATCH_ACT_OR_SKIP	= 0x13,
 	ASN1_OP_COND_MATCH_JUMP_OR_SKIP	= 0x15,
 	ASN1_OP_COND_MATCH_ANY		= 0x18,
+	ASN1_OP_COND_MATCH_ANY_OR_SKIP	= 0x19,
 	ASN1_OP_COND_MATCH_ANY_ACT	= 0x1a,
+	ASN1_OP_COND_MATCH_ANY_ACT_OR_SKIP = 0x1b,
 
 	/* Everything before here will want a tag from the data */
-#define ASN1_OP__MATCHES_TAG ASN1_OP_COND_MATCH_ANY_ACT
+#define ASN1_OP__MATCHES_TAG ASN1_OP_COND_MATCH_ANY_ACT_OR_SKIP
 
 	/* These are here to help fill up space */
-	ASN1_OP_COND_FAIL		= 0x1b,
-	ASN1_OP_COMPLETE		= 0x1c,
-	ASN1_OP_ACT			= 0x1d,
-	ASN1_OP_MAYBE_ACT		= 0x1e,
-	ASN1_OP_RETURN			= 0x1f,
+	ASN1_OP_COND_FAIL		= 0x1c,
+	ASN1_OP_COMPLETE		= 0x1d,
+	ASN1_OP_ACT			= 0x1e,
+	ASN1_OP_MAYBE_ACT		= 0x1f,
 
 	/* The following eight have bit 0 -> SET, 1 -> OF, 2 -> ACT */
 	ASN1_OP_END_SEQ			= 0x20,
@@ -77,6 +80,8 @@ enum asn1_opcode {
 #define ASN1_OP_END__OF			  0x02
 #define ASN1_OP_END__ACT		  0x04
 
+	ASN1_OP_RETURN			= 0x28,
+
 	ASN1_OP__NR
 };
 
diff --git a/lib/asn1_decoder.c b/lib/asn1_decoder.c
index 3f74dd3e29107f..2b3f46c049d458 100644
--- a/lib/asn1_decoder.c
+++ b/lib/asn1_decoder.c
@@ -24,12 +24,16 @@ static const unsigned char asn1_op_lengths[ASN1_OP__NR] = {
 	[ASN1_OP_MATCH_JUMP]			= 1 + 1 + 1,
 	[ASN1_OP_MATCH_JUMP_OR_SKIP]		= 1 + 1 + 1,
 	[ASN1_OP_MATCH_ANY]			= 1,
+	[ASN1_OP_MATCH_ANY_OR_SKIP]		= 1,
 	[ASN1_OP_MATCH_ANY_ACT]			= 1         + 1,
+	[ASN1_OP_MATCH_ANY_ACT_OR_SKIP]		= 1         + 1,
 	[ASN1_OP_COND_MATCH_OR_SKIP]		= 1 + 1,
 	[ASN1_OP_COND_MATCH_ACT_OR_SKIP]	= 1 + 1     + 1,
 	[ASN1_OP_COND_MATCH_JUMP_OR_SKIP]	= 1 + 1 + 1,
 	[ASN1_OP_COND_MATCH_ANY]		= 1,
+	[ASN1_OP_COND_MATCH_ANY_OR_SKIP]	= 1,
 	[ASN1_OP_COND_MATCH_ANY_ACT]		= 1         + 1,
+	[ASN1_OP_COND_MATCH_ANY_ACT_OR_SKIP]	= 1         + 1,
 	[ASN1_OP_COND_FAIL]			= 1,
 	[ASN1_OP_COMPLETE]			= 1,
 	[ASN1_OP_ACT]				= 1         + 1,
@@ -304,7 +308,9 @@ int asn1_ber_decoder(const struct asn1_decoder *decoder,
 	/* Decide how to handle the operation */
 	switch (op) {
 	case ASN1_OP_MATCH_ANY_ACT:
+	case ASN1_OP_MATCH_ANY_ACT_OR_SKIP:
 	case ASN1_OP_COND_MATCH_ANY_ACT:
+	case ASN1_OP_COND_MATCH_ANY_ACT_OR_SKIP:
 		ret = actions[machine[pc + 1]](context, hdr, tag, data + dp, len);
 		if (ret < 0)
 			return ret;
@@ -321,8 +327,10 @@ int asn1_ber_decoder(const struct asn1_decoder *decoder,
 	case ASN1_OP_MATCH:
 	case ASN1_OP_MATCH_OR_SKIP:
 	case ASN1_OP_MATCH_ANY:
+	case ASN1_OP_MATCH_ANY_OR_SKIP:
 	case ASN1_OP_COND_MATCH_OR_SKIP:
 	case ASN1_OP_COND_MATCH_ANY:
+	case ASN1_OP_COND_MATCH_ANY_OR_SKIP:
 	skip_data:
 		if (!(flags & FLAG_CONS)) {
 			if (flags & FLAG_INDEFINITE_LENGTH) {
diff --git a/scripts/asn1_compiler.c b/scripts/asn1_compiler.c
index 0515bced929a48..1c75e22b6385fe 100644
--- a/scripts/asn1_compiler.c
+++ b/scripts/asn1_compiler.c
@@ -1401,7 +1401,8 @@ static void render_element(FILE *out, struct element *e, struct element *tag)
 	act = e->action ? "_ACT" : "";
 	switch (e->compound) {
 	case ANY:
-		render_opcode(out, "ASN1_OP_%sMATCH_ANY%s,", cond, act);
+		render_opcode(out, "ASN1_OP_%sMATCH_ANY%s%s,",
+			      cond, act, skippable ? "_OR_SKIP" : "");
 		if (e->name)
 			render_more(out, "\t\t// %*.*s",
 				    (int)e->name->size, (int)e->name->size,

From ae9d2fb482fa48f637b6705e6fef6f7f999ec779 Mon Sep 17 00:00:00 2001
From: Paul Moore <pmoore@redhat.com>
Date: Wed, 5 Aug 2015 11:19:45 -0400
Subject: [PATCH 097/734] audit: fix uninitialized variable in audit_add_rule()

As reported by the 0-Day testing service:

   kernel/auditfilter.c: In function 'audit_rule_change':
>> kernel/auditfilter.c:864:6: warning: 'err' may be used uninit...
     int err;

Cc: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Paul Moore <pmoore@redhat.com>
---
 kernel/auditfilter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 4cb9b44f806e25..83f6d298d2345e 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -861,7 +861,7 @@ static inline int audit_add_rule(struct audit_entry *entry)
 	struct audit_watch *watch = entry->rule.watch;
 	struct audit_tree *tree = entry->rule.tree;
 	struct list_head *list;
-	int err;
+	int err = 0;
 #ifdef CONFIG_AUDITSYSCALL
 	int dont_count = 0;
 

From 8c85fc9ae69a4510ba5e2bd5fac2c1d9d60967ad Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Wed, 5 Aug 2015 15:23:09 -0400
Subject: [PATCH 098/734] audit: make audit_del_rule() more robust

Move the access to the entry for audit_match_signal() to earlier in the
function in case the entry found is the same one passed in.  This will enable
it to be used by audit_remove_mark_rule().

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
[PM: tweaked subject line as it no longer made sense after multiple revs]
Signed-off-by: Paul Moore <pmoore@redhat.com>
---
 kernel/auditfilter.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 83f6d298d2345e..7ca7d3b5aca2b2 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -953,7 +953,6 @@ static inline int audit_del_rule(struct audit_entry *entry)
 	mutex_lock(&audit_filter_mutex);
 	e = audit_find_rule(entry, &list);
 	if (!e) {
-		mutex_unlock(&audit_filter_mutex);
 		ret = -ENOENT;
 		goto out;
 	}
@@ -964,10 +963,6 @@ static inline int audit_del_rule(struct audit_entry *entry)
 	if (e->rule.tree)
 		audit_remove_tree_rule(&e->rule);
 
-	list_del_rcu(&e->list);
-	list_del(&e->rule.list);
-	call_rcu(&e->rcu, audit_free_rule_rcu);
-
 #ifdef CONFIG_AUDITSYSCALL
 	if (!dont_count)
 		audit_n_rules--;
@@ -975,9 +970,14 @@ static inline int audit_del_rule(struct audit_entry *entry)
 	if (!audit_match_signal(entry))
 		audit_signals--;
 #endif
-	mutex_unlock(&audit_filter_mutex);
+
+	list_del_rcu(&e->list);
+	list_del(&e->rule.list);
+	call_rcu(&e->rcu, audit_free_rule_rcu);
 
 out:
+	mutex_unlock(&audit_filter_mutex);
+
 	if (tree)
 		audit_put_tree(tree);	/* that's the temporary one */
 

From 85430968ae72650a63f77f05a29d5c56e41581db Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 3 Aug 2015 10:35:40 +0100
Subject: [PATCH 099/734] iommu/arm-smmu: Treat unknown OAS as 48-bit

A late change to the SMMUv3 architecture ensures that the OAS field
will be monotonically increasing, so we can assume that an unknown OAS
is at least 48-bit and use that, rather than fail the device probe.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/arm-smmu-v3.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 4f093373f4c30e..e51646a3b9738d 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -2550,12 +2550,12 @@ static int arm_smmu_device_probe(struct arm_smmu_device *smmu)
 	case IDR5_OAS_44_BIT:
 		smmu->oas = 44;
 		break;
+	default:
+		dev_info(smmu->dev,
+			"unknown output address size. Truncating to 48-bit\n");
+		/* Fallthrough */
 	case IDR5_OAS_48_BIT:
 		smmu->oas = 48;
-		break;
-	default:
-		dev_err(smmu->dev, "unknown output address size!\n");
-		return -ENXIO;
 	}
 
 	/* Set the DMA mask for our table walker */

From f8d5496131554f61b0fd931fa046f0233fe2aac2 Mon Sep 17 00:00:00 2001
From: Robin Murphy <Robin.Murphy@arm.com>
Date: Wed, 29 Jul 2015 19:46:04 +0100
Subject: [PATCH 100/734] iommu/io-pgtable-arm: Allow appropriate DMA API use

Currently, users of the LPAE page table code are (ab)using dma_map_page()
as a means to flush page table updates for non-coherent IOMMUs. Since
from the CPU's point of view, creating IOMMU page tables *is* passing
DMA buffers to a device (the IOMMU's page table walker), there's little
reason not to use the DMA API correctly.

Allow IOMMU drivers to opt into DMA API operations for page table
allocation and updates by providing their appropriate device pointer.
The expectation is that an LPAE IOMMU should have a full view of system
memory, so use streaming mappings to avoid unnecessary pressure on
ZONE_DMA, and treat any DMA translation as a warning sign.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/Kconfig          |   3 +-
 drivers/iommu/io-pgtable-arm.c | 107 ++++++++++++++++++++++++++-------
 drivers/iommu/io-pgtable.h     |   3 +
 3 files changed, 89 insertions(+), 24 deletions(-)

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index f1fb1d3ccc56e6..d77a848d50deb3 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -23,7 +23,8 @@ config IOMMU_IO_PGTABLE
 config IOMMU_IO_PGTABLE_LPAE
 	bool "ARMv7/v8 Long Descriptor Format"
 	select IOMMU_IO_PGTABLE
-	depends on ARM || ARM64 || COMPILE_TEST
+	# SWIOTLB guarantees a dma_to_phys() implementation
+	depends on ARM || ARM64 || (COMPILE_TEST && SWIOTLB)
 	help
 	  Enable support for the ARM long descriptor pagetable format.
 	  This allocator supports 4K/2M/1G, 16K/32M and 64K/512M page
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 4e460216bd1644..28cca8a652f917 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -200,12 +200,76 @@ typedef u64 arm_lpae_iopte;
 
 static bool selftest_running = false;
 
+static dma_addr_t __arm_lpae_dma_addr(struct device *dev, void *pages)
+{
+	return phys_to_dma(dev, virt_to_phys(pages));
+}
+
+static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
+				    struct io_pgtable_cfg *cfg)
+{
+	struct device *dev = cfg->iommu_dev;
+	dma_addr_t dma;
+	void *pages = alloc_pages_exact(size, gfp | __GFP_ZERO);
+
+	if (!pages)
+		return NULL;
+
+	if (dev) {
+		dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, dma))
+			goto out_free;
+		/*
+		 * We depend on the IOMMU being able to work with any physical
+		 * address directly, so if the DMA layer suggests it can't by
+		 * giving us back some translation, that bodes very badly...
+		 */
+		if (dma != __arm_lpae_dma_addr(dev, pages))
+			goto out_unmap;
+	}
+
+	return pages;
+
+out_unmap:
+	dev_err(dev, "Cannot accommodate DMA translation for IOMMU page tables\n");
+	dma_unmap_single(dev, dma, size, DMA_TO_DEVICE);
+out_free:
+	free_pages_exact(pages, size);
+	return NULL;
+}
+
+static void __arm_lpae_free_pages(void *pages, size_t size,
+				  struct io_pgtable_cfg *cfg)
+{
+	struct device *dev = cfg->iommu_dev;
+
+	if (dev)
+		dma_unmap_single(dev, __arm_lpae_dma_addr(dev, pages),
+				 size, DMA_TO_DEVICE);
+	free_pages_exact(pages, size);
+}
+
+static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte,
+			       struct io_pgtable_cfg *cfg, void *cookie)
+{
+	struct device *dev = cfg->iommu_dev;
+
+	*ptep = pte;
+
+	if (dev)
+		dma_sync_single_for_device(dev, __arm_lpae_dma_addr(dev, ptep),
+					   sizeof(pte), DMA_TO_DEVICE);
+	else if (cfg->tlb->flush_pgtable)
+		cfg->tlb->flush_pgtable(ptep, sizeof(pte), cookie);
+}
+
 static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 			     unsigned long iova, phys_addr_t paddr,
 			     arm_lpae_iopte prot, int lvl,
 			     arm_lpae_iopte *ptep)
 {
 	arm_lpae_iopte pte = prot;
+	struct io_pgtable_cfg *cfg = &data->iop.cfg;
 
 	/* We require an unmap first */
 	if (iopte_leaf(*ptep, lvl)) {
@@ -213,7 +277,7 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 		return -EEXIST;
 	}
 
-	if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS)
+	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
 		pte |= ARM_LPAE_PTE_NS;
 
 	if (lvl == ARM_LPAE_MAX_LEVELS - 1)
@@ -224,8 +288,7 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 	pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS;
 	pte |= pfn_to_iopte(paddr >> data->pg_shift, data);
 
-	*ptep = pte;
-	data->iop.cfg.tlb->flush_pgtable(ptep, sizeof(*ptep), data->iop.cookie);
+	__arm_lpae_set_pte(ptep, pte, cfg, data->iop.cookie);
 	return 0;
 }
 
@@ -236,12 +299,13 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
 	arm_lpae_iopte *cptep, pte;
 	void *cookie = data->iop.cookie;
 	size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
+	struct io_pgtable_cfg *cfg = &data->iop.cfg;
 
 	/* Find our entry at the current level */
 	ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
 
 	/* If we can install a leaf entry at this level, then do so */
-	if (size == block_size && (size & data->iop.cfg.pgsize_bitmap))
+	if (size == block_size && (size & cfg->pgsize_bitmap))
 		return arm_lpae_init_pte(data, iova, paddr, prot, lvl, ptep);
 
 	/* We can't allocate tables at the final level */
@@ -251,18 +315,15 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
 	/* Grab a pointer to the next level */
 	pte = *ptep;
 	if (!pte) {
-		cptep = alloc_pages_exact(1UL << data->pg_shift,
-					 GFP_ATOMIC | __GFP_ZERO);
+		cptep = __arm_lpae_alloc_pages(1UL << data->pg_shift,
+					       GFP_ATOMIC, cfg);
 		if (!cptep)
 			return -ENOMEM;
 
-		data->iop.cfg.tlb->flush_pgtable(cptep, 1UL << data->pg_shift,
-						 cookie);
 		pte = __pa(cptep) | ARM_LPAE_PTE_TYPE_TABLE;
-		if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS)
+		if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
 			pte |= ARM_LPAE_PTE_NSTABLE;
-		*ptep = pte;
-		data->iop.cfg.tlb->flush_pgtable(ptep, sizeof(*ptep), cookie);
+		__arm_lpae_set_pte(ptep, pte, cfg, cookie);
 	} else {
 		cptep = iopte_deref(pte, data);
 	}
@@ -347,7 +408,7 @@ static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
 		__arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data));
 	}
 
-	free_pages_exact(start, table_size);
+	__arm_lpae_free_pages(start, table_size, &data->iop.cfg);
 }
 
 static void arm_lpae_free_pgtable(struct io_pgtable *iop)
@@ -366,8 +427,8 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 	unsigned long blk_start, blk_end;
 	phys_addr_t blk_paddr;
 	arm_lpae_iopte table = 0;
+	struct io_pgtable_cfg *cfg = &data->iop.cfg;
 	void *cookie = data->iop.cookie;
-	const struct iommu_gather_ops *tlb = data->iop.cfg.tlb;
 
 	blk_start = iova & ~(blk_size - 1);
 	blk_end = blk_start + blk_size;
@@ -393,10 +454,9 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 		}
 	}
 
-	*ptep = table;
-	tlb->flush_pgtable(ptep, sizeof(*ptep), cookie);
+	__arm_lpae_set_pte(ptep, table, cfg, cookie);
 	iova &= ~(blk_size - 1);
-	tlb->tlb_add_flush(iova, blk_size, true, cookie);
+	cfg->tlb->tlb_add_flush(iova, blk_size, true, cookie);
 	return size;
 }
 
@@ -418,13 +478,12 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 
 	/* If the size matches this level, we're in the right place */
 	if (size == blk_size) {
-		*ptep = 0;
-		tlb->flush_pgtable(ptep, sizeof(*ptep), cookie);
+		__arm_lpae_set_pte(ptep, 0, &data->iop.cfg, cookie);
 
 		if (!iopte_leaf(pte, lvl)) {
 			/* Also flush any partial walks */
 			tlb->tlb_add_flush(iova, size, false, cookie);
-			tlb->tlb_sync(data->iop.cookie);
+			tlb->tlb_sync(cookie);
 			ptep = iopte_deref(pte, data);
 			__arm_lpae_free_pgtable(data, lvl + 1, ptep);
 		} else {
@@ -640,11 +699,12 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
 	cfg->arm_lpae_s1_cfg.mair[1] = 0;
 
 	/* Looking good; allocate a pgd */
-	data->pgd = alloc_pages_exact(data->pgd_size, GFP_KERNEL | __GFP_ZERO);
+	data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg);
 	if (!data->pgd)
 		goto out_free_data;
 
-	cfg->tlb->flush_pgtable(data->pgd, data->pgd_size, cookie);
+	if (cfg->tlb->flush_pgtable)
+		cfg->tlb->flush_pgtable(data->pgd, data->pgd_size, cookie);
 
 	/* TTBRs */
 	cfg->arm_lpae_s1_cfg.ttbr[0] = virt_to_phys(data->pgd);
@@ -728,11 +788,12 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
 	cfg->arm_lpae_s2_cfg.vtcr = reg;
 
 	/* Allocate pgd pages */
-	data->pgd = alloc_pages_exact(data->pgd_size, GFP_KERNEL | __GFP_ZERO);
+	data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg);
 	if (!data->pgd)
 		goto out_free_data;
 
-	cfg->tlb->flush_pgtable(data->pgd, data->pgd_size, cookie);
+	if (cfg->tlb->flush_pgtable)
+		cfg->tlb->flush_pgtable(data->pgd, data->pgd_size, cookie);
 
 	/* VTTBR */
 	cfg->arm_lpae_s2_cfg.vttbr = virt_to_phys(data->pgd);
diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h
index 10e32f69c66813..c69529c7891439 100644
--- a/drivers/iommu/io-pgtable.h
+++ b/drivers/iommu/io-pgtable.h
@@ -41,6 +41,8 @@ struct iommu_gather_ops {
  * @ias:           Input address (iova) size, in bits.
  * @oas:           Output address (paddr) size, in bits.
  * @tlb:           TLB management callbacks for this set of tables.
+ * @iommu_dev:     The device representing the DMA configuration for the
+ *                 page table walker.
  */
 struct io_pgtable_cfg {
 	#define IO_PGTABLE_QUIRK_ARM_NS	(1 << 0)	/* Set NS bit in PTEs */
@@ -49,6 +51,7 @@ struct io_pgtable_cfg {
 	unsigned int			ias;
 	unsigned int			oas;
 	const struct iommu_gather_ops	*tlb;
+	struct device			*iommu_dev;
 
 	/* Low-level data specific to the table format */
 	union {

From 2df7a25ce4a79092946330ac4b7a2fbb5944d1d6 Mon Sep 17 00:00:00 2001
From: Robin Murphy <Robin.Murphy@arm.com>
Date: Wed, 29 Jul 2015 19:46:06 +0100
Subject: [PATCH 101/734] iommu/arm-smmu: Clean up DMA API usage

With the correct DMA API calls now integrated into the io-pgtable code,
let that handle the flushing of non-coherent page table updates.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/arm-smmu.c | 22 ++++++----------------
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 0583ed2f33c00c..5770ab98fa38c3 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -611,24 +611,13 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 static void arm_smmu_flush_pgtable(void *addr, size_t size, void *cookie)
 {
 	struct arm_smmu_domain *smmu_domain = cookie;
-	struct arm_smmu_device *smmu = smmu_domain->smmu;
-	unsigned long offset = (unsigned long)addr & ~PAGE_MASK;
-
 
-	/* Ensure new page tables are visible to the hardware walker */
-	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) {
+	/*
+	 * Ensure new page tables are visible to a coherent hardware walker.
+	 * The page table code deals with flushing for the non-coherent case.
+	 */
+	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
 		dsb(ishst);
-	} else {
-		/*
-		 * If the SMMU can't walk tables in the CPU caches, treat them
-		 * like non-coherent DMA since we need to flush the new entries
-		 * all the way out to memory. There's no possibility of
-		 * recursion here as the SMMU table walker will not be wired
-		 * through another SMMU.
-		 */
-		dma_map_page(smmu->dev, virt_to_page(addr), offset, size,
-			     DMA_TO_DEVICE);
-	}
 }
 
 static struct iommu_gather_ops arm_smmu_gather_ops = {
@@ -899,6 +888,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
 		.ias		= ias,
 		.oas		= oas,
 		.tlb		= &arm_smmu_gather_ops,
+		.iommu_dev	= smmu->dev,
 	};
 
 	smmu_domain->smmu = smmu;

From bdc6d973473f32891a8518c51b210ce7daaa10ac Mon Sep 17 00:00:00 2001
From: Robin Murphy <Robin.Murphy@arm.com>
Date: Wed, 29 Jul 2015 19:46:07 +0100
Subject: [PATCH 102/734] iommu/arm-smmu: Clean up DMA API usage

With the correct DMA API calls now integrated into the io-pgtable code,
let that handle the flushing of non-coherent page table updates.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/arm-smmu-v3.c | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index e51646a3b9738d..54c68d123a6d7a 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1334,23 +1334,10 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 static void arm_smmu_flush_pgtable(void *addr, size_t size, void *cookie)
 {
 	struct arm_smmu_domain *smmu_domain = cookie;
-	struct arm_smmu_device *smmu = smmu_domain->smmu;
-	unsigned long offset = (unsigned long)addr & ~PAGE_MASK;
 
-	if (smmu->features & ARM_SMMU_FEAT_COHERENCY) {
+	/* The page table code handles flushing in the non-coherent case */
+	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENCY)
 		dsb(ishst);
-	} else {
-		dma_addr_t dma_addr;
-		struct device *dev = smmu->dev;
-
-		dma_addr = dma_map_page(dev, virt_to_page(addr), offset, size,
-					DMA_TO_DEVICE);
-
-		if (dma_mapping_error(dev, dma_addr))
-			dev_err(dev, "failed to flush pgtable at %p\n", addr);
-		else
-			dma_unmap_page(dev, dma_addr, size, DMA_TO_DEVICE);
-	}
 }
 
 static struct iommu_gather_ops arm_smmu_gather_ops = {
@@ -1532,6 +1519,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
 		.ias		= ias,
 		.oas		= oas,
 		.tlb		= &arm_smmu_gather_ops,
+		.iommu_dev	= smmu->dev,
 	};
 
 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);

From ff2ed96dde3b30d8f1b2ab0d9b164140f2278e6e Mon Sep 17 00:00:00 2001
From: Robin Murphy <Robin.Murphy@arm.com>
Date: Wed, 29 Jul 2015 19:46:08 +0100
Subject: [PATCH 103/734] iommu/ipmmu-vmsa: Clean up DMA API usage

With the correct DMA API calls now integrated into the io-pgtable code,
let that handle the flushing of non-coherent page table updates.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/ipmmu-vmsa.c | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 1a67c531a07eb9..8cf605fa994601 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -283,24 +283,10 @@ static void ipmmu_tlb_add_flush(unsigned long iova, size_t size, bool leaf,
 	/* The hardware doesn't support selective TLB flush. */
 }
 
-static void ipmmu_flush_pgtable(void *ptr, size_t size, void *cookie)
-{
-	unsigned long offset = (unsigned long)ptr & ~PAGE_MASK;
-	struct ipmmu_vmsa_domain *domain = cookie;
-
-	/*
-	 * TODO: Add support for coherent walk through CCI with DVM and remove
-	 * cache handling.
-	 */
-	dma_map_page(domain->mmu->dev, virt_to_page(ptr), offset, size,
-		     DMA_TO_DEVICE);
-}
-
 static struct iommu_gather_ops ipmmu_gather_ops = {
 	.tlb_flush_all = ipmmu_tlb_flush_all,
 	.tlb_add_flush = ipmmu_tlb_add_flush,
 	.tlb_sync = ipmmu_tlb_flush_all,
-	.flush_pgtable = ipmmu_flush_pgtable,
 };
 
 /* -----------------------------------------------------------------------------
@@ -327,6 +313,11 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
 	domain->cfg.ias = 32;
 	domain->cfg.oas = 40;
 	domain->cfg.tlb = &ipmmu_gather_ops;
+	/*
+	 * TODO: Add support for coherent walk through CCI with DVM and remove
+	 * cache handling. For now, delegate it to the io-pgtable code.
+	 */
+	domain->cfg.iommu_dev = domain->mmu->dev;
 
 	domain->iop = alloc_io_pgtable_ops(ARM_32_LPAE_S1, &domain->cfg,
 					   domain);

From 87a91b15d691d6f4aa0a5baffb5767bbc6e4a8c4 Mon Sep 17 00:00:00 2001
From: Robin Murphy <Robin.Murphy@arm.com>
Date: Wed, 29 Jul 2015 19:46:09 +0100
Subject: [PATCH 104/734] iommu/io-pgtable-arm: Centralise sync points

With all current users now opted in to DMA API operations, make the
iommu_dev pointer mandatory, rendering the flush_pgtable callback
redundant for cache maintenance. However, since the DMA calls could be
nops in the case of a coherent IOMMU, we still need to ensure the page
table updates are fully synchronised against a subsequent page table
walk. In the unmap path, the TLB sync will usually need to do this
anyway, so just cement that requirement; in the map path which may
consist solely of cacheable memory writes (in the coherent case),
insert an appropriate barrier at the end of the operation, and obviate
the need to call flush_pgtable on every individual update for
synchronisation.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
[will: slight clarification to tlb_sync comment]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/io-pgtable-arm.c | 43 +++++++++++++++++++---------------
 drivers/iommu/io-pgtable.h     |  4 +++-
 2 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 28cca8a652f917..06176872fb787e 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -26,6 +26,8 @@
 #include <linux/slab.h>
 #include <linux/types.h>
 
+#include <asm/barrier.h>
+
 #include "io-pgtable.h"
 
 #define ARM_LPAE_MAX_ADDR_BITS		48
@@ -215,7 +217,7 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
 	if (!pages)
 		return NULL;
 
-	if (dev) {
+	if (!selftest_running) {
 		dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE);
 		if (dma_mapping_error(dev, dma))
 			goto out_free;
@@ -243,24 +245,22 @@ static void __arm_lpae_free_pages(void *pages, size_t size,
 {
 	struct device *dev = cfg->iommu_dev;
 
-	if (dev)
+	if (!selftest_running)
 		dma_unmap_single(dev, __arm_lpae_dma_addr(dev, pages),
 				 size, DMA_TO_DEVICE);
 	free_pages_exact(pages, size);
 }
 
 static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte,
-			       struct io_pgtable_cfg *cfg, void *cookie)
+			       struct io_pgtable_cfg *cfg)
 {
 	struct device *dev = cfg->iommu_dev;
 
 	*ptep = pte;
 
-	if (dev)
+	if (!selftest_running)
 		dma_sync_single_for_device(dev, __arm_lpae_dma_addr(dev, ptep),
 					   sizeof(pte), DMA_TO_DEVICE);
-	else if (cfg->tlb->flush_pgtable)
-		cfg->tlb->flush_pgtable(ptep, sizeof(pte), cookie);
 }
 
 static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
@@ -288,7 +288,7 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 	pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS;
 	pte |= pfn_to_iopte(paddr >> data->pg_shift, data);
 
-	__arm_lpae_set_pte(ptep, pte, cfg, data->iop.cookie);
+	__arm_lpae_set_pte(ptep, pte, cfg);
 	return 0;
 }
 
@@ -297,7 +297,6 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
 			  int lvl, arm_lpae_iopte *ptep)
 {
 	arm_lpae_iopte *cptep, pte;
-	void *cookie = data->iop.cookie;
 	size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
 	struct io_pgtable_cfg *cfg = &data->iop.cfg;
 
@@ -323,7 +322,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
 		pte = __pa(cptep) | ARM_LPAE_PTE_TYPE_TABLE;
 		if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
 			pte |= ARM_LPAE_PTE_NSTABLE;
-		__arm_lpae_set_pte(ptep, pte, cfg, cookie);
+		__arm_lpae_set_pte(ptep, pte, cfg);
 	} else {
 		cptep = iopte_deref(pte, data);
 	}
@@ -370,7 +369,7 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
 {
 	struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
 	arm_lpae_iopte *ptep = data->pgd;
-	int lvl = ARM_LPAE_START_LVL(data);
+	int ret, lvl = ARM_LPAE_START_LVL(data);
 	arm_lpae_iopte prot;
 
 	/* If no access, then nothing to do */
@@ -378,7 +377,14 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
 		return 0;
 
 	prot = arm_lpae_prot_to_pte(data, iommu_prot);
-	return __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep);
+	ret = __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep);
+	/*
+	 * Synchronise all PTE updates for the new mapping before there's
+	 * a chance for anything to kick off a table walk for the new iova.
+	 */
+	wmb();
+
+	return ret;
 }
 
 static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
@@ -428,7 +434,6 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 	phys_addr_t blk_paddr;
 	arm_lpae_iopte table = 0;
 	struct io_pgtable_cfg *cfg = &data->iop.cfg;
-	void *cookie = data->iop.cookie;
 
 	blk_start = iova & ~(blk_size - 1);
 	blk_end = blk_start + blk_size;
@@ -454,9 +459,9 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 		}
 	}
 
-	__arm_lpae_set_pte(ptep, table, cfg, cookie);
+	__arm_lpae_set_pte(ptep, table, cfg);
 	iova &= ~(blk_size - 1);
-	cfg->tlb->tlb_add_flush(iova, blk_size, true, cookie);
+	cfg->tlb->tlb_add_flush(iova, blk_size, true, data->iop.cookie);
 	return size;
 }
 
@@ -478,7 +483,7 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 
 	/* If the size matches this level, we're in the right place */
 	if (size == blk_size) {
-		__arm_lpae_set_pte(ptep, 0, &data->iop.cfg, cookie);
+		__arm_lpae_set_pte(ptep, 0, &data->iop.cfg);
 
 		if (!iopte_leaf(pte, lvl)) {
 			/* Also flush any partial walks */
@@ -703,8 +708,8 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
 	if (!data->pgd)
 		goto out_free_data;
 
-	if (cfg->tlb->flush_pgtable)
-		cfg->tlb->flush_pgtable(data->pgd, data->pgd_size, cookie);
+	/* Ensure the empty pgd is visible before any actual TTBR write */
+	wmb();
 
 	/* TTBRs */
 	cfg->arm_lpae_s1_cfg.ttbr[0] = virt_to_phys(data->pgd);
@@ -792,8 +797,8 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
 	if (!data->pgd)
 		goto out_free_data;
 
-	if (cfg->tlb->flush_pgtable)
-		cfg->tlb->flush_pgtable(data->pgd, data->pgd_size, cookie);
+	/* Ensure the empty pgd is visible before any actual TTBR write */
+	wmb();
 
 	/* VTTBR */
 	cfg->arm_lpae_s2_cfg.vttbr = virt_to_phys(data->pgd);
diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h
index c69529c7891439..e8fadb012ed232 100644
--- a/drivers/iommu/io-pgtable.h
+++ b/drivers/iommu/io-pgtable.h
@@ -17,7 +17,9 @@ enum io_pgtable_fmt {
  *
  * @tlb_flush_all: Synchronously invalidate the entire TLB context.
  * @tlb_add_flush: Queue up a TLB invalidation for a virtual address range.
- * @tlb_sync:      Ensure any queue TLB invalidation has taken effect.
+ * @tlb_sync:      Ensure any queued TLB invalidation has taken effect, and
+ *                 any corresponding page table updates are visible to the
+ *                 IOMMU.
  * @flush_pgtable: Ensure page table updates are visible to the IOMMU.
  *
  * Note that these can all be called in atomic context and must therefore

From 4103d662cbd0c045d7a44a18c82172220478b20c Mon Sep 17 00:00:00 2001
From: Robin Murphy <Robin.Murphy@arm.com>
Date: Wed, 29 Jul 2015 19:46:10 +0100
Subject: [PATCH 105/734] iommu/arm-smmu: Remove arm_smmu_flush_pgtable()

With the io-pgtable code now enforcing its own appropriate sync points,
the vestigial flush_pgtable callback becomes entirely redundant, so
remove it altogether.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/arm-smmu.c | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 5770ab98fa38c3..48a39dfa977795 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -608,23 +608,10 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 	}
 }
 
-static void arm_smmu_flush_pgtable(void *addr, size_t size, void *cookie)
-{
-	struct arm_smmu_domain *smmu_domain = cookie;
-
-	/*
-	 * Ensure new page tables are visible to a coherent hardware walker.
-	 * The page table code deals with flushing for the non-coherent case.
-	 */
-	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
-		dsb(ishst);
-}
-
 static struct iommu_gather_ops arm_smmu_gather_ops = {
 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
 	.tlb_add_flush	= arm_smmu_tlb_inv_range_nosync,
 	.tlb_sync	= arm_smmu_tlb_sync,
-	.flush_pgtable	= arm_smmu_flush_pgtable,
 };
 
 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)

From 857c88ca62f1e2594e1e760ef9a45ec1961f2a53 Mon Sep 17 00:00:00 2001
From: Robin Murphy <Robin.Murphy@arm.com>
Date: Wed, 29 Jul 2015 19:46:11 +0100
Subject: [PATCH 106/734] iommu/arm-smmu: Remove arm_smmu_flush_pgtable()

With the io-pgtable code now enforcing its own appropriate sync points,
the vestigial flush_pgtable callback becomes entirely redundant, so
remove it altogether.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/arm-smmu-v3.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 54c68d123a6d7a..dafaf59dc3b828 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1331,20 +1331,10 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
 }
 
-static void arm_smmu_flush_pgtable(void *addr, size_t size, void *cookie)
-{
-	struct arm_smmu_domain *smmu_domain = cookie;
-
-	/* The page table code handles flushing in the non-coherent case */
-	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENCY)
-		dsb(ishst);
-}
-
 static struct iommu_gather_ops arm_smmu_gather_ops = {
 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
 	.tlb_add_flush	= arm_smmu_tlb_inv_range_nosync,
 	.tlb_sync	= arm_smmu_tlb_sync,
-	.flush_pgtable	= arm_smmu_flush_pgtable,
 };
 
 /* IOMMU API */

From f5b831907da3e64bfb0288089a5c07124266b1a5 Mon Sep 17 00:00:00 2001
From: Robin Murphy <Robin.Murphy@arm.com>
Date: Wed, 29 Jul 2015 19:46:12 +0100
Subject: [PATCH 107/734] iommu/io-pgtable: Remove flush_pgtable callback

With the users fully converted to DMA API operations, it's dead, Jim.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/io-pgtable-arm.c | 6 ------
 drivers/iommu/io-pgtable.h     | 2 --
 2 files changed, 8 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 06176872fb787e..e4bc2b23ab96e3 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -884,16 +884,10 @@ static void dummy_tlb_sync(void *cookie)
 	WARN_ON(cookie != cfg_cookie);
 }
 
-static void dummy_flush_pgtable(void *ptr, size_t size, void *cookie)
-{
-	WARN_ON(cookie != cfg_cookie);
-}
-
 static struct iommu_gather_ops dummy_tlb_ops __initdata = {
 	.tlb_flush_all	= dummy_tlb_flush_all,
 	.tlb_add_flush	= dummy_tlb_add_flush,
 	.tlb_sync	= dummy_tlb_sync,
-	.flush_pgtable	= dummy_flush_pgtable,
 };
 
 static void __init arm_lpae_dump_ops(struct io_pgtable_ops *ops)
diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h
index e8fadb012ed232..48538a35d0785d 100644
--- a/drivers/iommu/io-pgtable.h
+++ b/drivers/iommu/io-pgtable.h
@@ -20,7 +20,6 @@ enum io_pgtable_fmt {
  * @tlb_sync:      Ensure any queued TLB invalidation has taken effect, and
  *                 any corresponding page table updates are visible to the
  *                 IOMMU.
- * @flush_pgtable: Ensure page table updates are visible to the IOMMU.
  *
  * Note that these can all be called in atomic context and must therefore
  * not block.
@@ -30,7 +29,6 @@ struct iommu_gather_ops {
 	void (*tlb_add_flush)(unsigned long iova, size_t size, bool leaf,
 			      void *cookie);
 	void (*tlb_sync)(void *cookie);
-	void (*flush_pgtable)(void *ptr, size_t size, void *cookie);
 };
 
 /**

From 84cb777e67814f2e06a99ff228f743409e9617e9 Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Wed, 5 Aug 2015 23:48:20 -0400
Subject: [PATCH 108/734] audit: use macros for unset inode and device values

Clean up a number of places were casted magic numbers are used to represent
unset inode and device numbers in preparation for the audit by executable path
patch set.

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
[PM: enclosed the _UNSET macros in parentheses for ./scripts/checkpatch]
Signed-off-by: Paul Moore <pmoore@redhat.com>
---
 include/linux/audit.h | 3 +++
 kernel/audit.c        | 2 +-
 kernel/audit_watch.c  | 8 ++++----
 kernel/auditsc.c      | 6 +++---
 4 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/include/linux/audit.h b/include/linux/audit.h
index c2e7e3a8396534..759feb0e9d1312 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -27,6 +27,9 @@
 #include <linux/ptrace.h>
 #include <uapi/linux/audit.h>
 
+#define AUDIT_INO_UNSET ((unsigned long)-1)
+#define AUDIT_DEV_UNSET ((dev_t)-1)
+
 struct audit_sig_info {
 	uid_t		uid;
 	pid_t		pid;
diff --git a/kernel/audit.c b/kernel/audit.c
index 7497a5a0fac04b..060153dc47d464 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1759,7 +1759,7 @@ void audit_log_name(struct audit_context *context, struct audit_names *n,
 	} else
 		audit_log_format(ab, " name=(null)");
 
-	if (n->ino != (unsigned long)-1) {
+	if (n->ino != AUDIT_INO_UNSET) {
 		audit_log_format(ab, " inode=%lu"
 				 " dev=%02x:%02x mode=%#ho"
 				 " ouid=%u ogid=%u rdev=%02x:%02x",
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index b81ad5bc74850f..645c6884cee54e 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -138,7 +138,7 @@ char *audit_watch_path(struct audit_watch *watch)
 
 int audit_watch_compare(struct audit_watch *watch, unsigned long ino, dev_t dev)
 {
-	return (watch->ino != (unsigned long)-1) &&
+	return (watch->ino != AUDIT_INO_UNSET) &&
 		(watch->ino == ino) &&
 		(watch->dev == dev);
 }
@@ -179,8 +179,8 @@ static struct audit_watch *audit_init_watch(char *path)
 	INIT_LIST_HEAD(&watch->rules);
 	atomic_set(&watch->count, 1);
 	watch->path = path;
-	watch->dev = (dev_t)-1;
-	watch->ino = (unsigned long)-1;
+	watch->dev = AUDIT_DEV_UNSET;
+	watch->ino = AUDIT_INO_UNSET;
 
 	return watch;
 }
@@ -493,7 +493,7 @@ static int audit_watch_handle_event(struct fsnotify_group *group,
 	if (mask & (FS_CREATE|FS_MOVED_TO) && inode)
 		audit_update_watch(parent, dname, inode->i_sb->s_dev, inode->i_ino, 0);
 	else if (mask & (FS_DELETE|FS_MOVED_FROM))
-		audit_update_watch(parent, dname, (dev_t)-1, (unsigned long)-1, 1);
+		audit_update_watch(parent, dname, AUDIT_DEV_UNSET, AUDIT_INO_UNSET, 1);
 	else if (mask & (FS_DELETE_SELF|FS_UNMOUNT|FS_MOVE_SELF))
 		audit_remove_parent_watches(parent);
 
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index f6bc31e7dca9bd..ea3fe2b748a854 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -180,7 +180,7 @@ static int audit_match_filetype(struct audit_context *ctx, int val)
 		return 0;
 
 	list_for_each_entry(n, &ctx->names_list, list) {
-		if ((n->ino != -1) &&
+		if ((n->ino != AUDIT_INO_UNSET) &&
 		    ((n->mode & S_IFMT) == mode))
 			return 1;
 	}
@@ -1681,7 +1681,7 @@ static struct audit_names *audit_alloc_name(struct audit_context *context,
 		aname->should_free = true;
 	}
 
-	aname->ino = (unsigned long)-1;
+	aname->ino = AUDIT_INO_UNSET;
 	aname->type = type;
 	list_add_tail(&aname->list, &context->names_list);
 
@@ -1923,7 +1923,7 @@ void __audit_inode_child(const struct inode *parent,
 	if (inode)
 		audit_copy_inode(found_child, dentry, inode);
 	else
-		found_child->ino = (unsigned long)-1;
+		found_child->ino = AUDIT_INO_UNSET;
 }
 EXPORT_SYMBOL_GPL(__audit_inode_child);
 

From 7f49294282c49ef426ed05eb4959728524ba140c Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Wed, 5 Aug 2015 16:29:36 -0400
Subject: [PATCH 109/734] audit: clean simple fsnotify implementation

This is to be used to audit by executable path rules, but audit watches should
be able to share this code eventually.

At the moment the audit watch code is a lot more complex.  That code only
creates one fsnotify watch per parent directory.  That 'audit_parent' in
turn has a list of 'audit_watches' which contain the name, ino, dev of
the specific object we care about.  This just creates one fsnotify watch
per object we care about.  So if you watch 100 inodes in /etc this code
will create 100 fsnotify watches on /etc.  The audit_watch code will
instead create 1 fsnotify watch on /etc (the audit_parent) and then 100
individual watches chained from that fsnotify mark.

We should be able to convert the audit_watch code to do one fsnotify
mark per watch and simplify things/remove a whole lot of code.  After
that conversion we should be able to convert the audit_fsnotify code to
support that hierarchy if the optimization is necessary.

Move the access to the entry for audit_match_signal() to the beginning of
the audit_del_rule() function in case the entry found is the same one passed
in.  This will enable it to be used by audit_autoremove_mark_rule(),
kill_rules() and audit_remove_parent_watches().

This is a heavily modified and merged version of two patches originally
submitted by Eric Paris.

Cc: Peter Moody <peter@hda3.com>
Cc: Eric Paris <eparis@redhat.com>
Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
[PM: added a space after a declaration to keep ./scripts/checkpatch happy]
Signed-off-by: Paul Moore <pmoore@redhat.com>
---
 kernel/Makefile         |   2 +-
 kernel/audit.h          |  14 +++
 kernel/audit_fsnotify.c | 216 ++++++++++++++++++++++++++++++++++++++++
 kernel/auditfilter.c    |   2 +-
 4 files changed, 232 insertions(+), 2 deletions(-)
 create mode 100644 kernel/audit_fsnotify.c

diff --git a/kernel/Makefile b/kernel/Makefile
index 1408b3353a3cfb..d7657f5535c91b 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -62,7 +62,7 @@ obj-$(CONFIG_SMP) += stop_machine.o
 obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
 obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
 obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
-obj-$(CONFIG_AUDIT_WATCH) += audit_watch.o
+obj-$(CONFIG_AUDIT_WATCH) += audit_watch.o audit_fsnotify.o
 obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
 obj-$(CONFIG_GCOV_KERNEL) += gcov/
 obj-$(CONFIG_KPROBES) += kprobes.o
diff --git a/kernel/audit.h b/kernel/audit.h
index 1caa0d345d9061..7102d538737b2a 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -50,6 +50,7 @@ enum audit_state {
 
 /* Rule lists */
 struct audit_watch;
+struct audit_fsnotify_mark;
 struct audit_tree;
 struct audit_chunk;
 
@@ -252,6 +253,7 @@ struct audit_net {
 extern int selinux_audit_rule_update(void);
 
 extern struct mutex audit_filter_mutex;
+extern int audit_del_rule(struct audit_entry *);
 extern void audit_free_rule_rcu(struct rcu_head *);
 extern struct list_head audit_filter_list[];
 
@@ -266,6 +268,13 @@ extern int audit_add_watch(struct audit_krule *krule, struct list_head **list);
 extern void audit_remove_watch_rule(struct audit_krule *krule);
 extern char *audit_watch_path(struct audit_watch *watch);
 extern int audit_watch_compare(struct audit_watch *watch, unsigned long ino, dev_t dev);
+
+extern struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pathname, int len);
+extern char *audit_mark_path(struct audit_fsnotify_mark *mark);
+extern void audit_remove_mark(struct audit_fsnotify_mark *audit_mark);
+extern void audit_remove_mark_rule(struct audit_krule *krule);
+extern int audit_mark_compare(struct audit_fsnotify_mark *mark, unsigned long ino, dev_t dev);
+
 #else
 #define audit_put_watch(w) {}
 #define audit_get_watch(w) {}
@@ -275,6 +284,11 @@ extern int audit_watch_compare(struct audit_watch *watch, unsigned long ino, dev
 #define audit_watch_path(w) ""
 #define audit_watch_compare(w, i, d) 0
 
+#define audit_alloc_mark(k, p, l) (ERR_PTR(-EINVAL))
+#define audit_mark_path(m) ""
+#define audit_remove_mark(m)
+#define audit_remove_mark_rule(k)
+#define audit_mark_compare(m, i, d) 0
 #endif /* CONFIG_AUDIT_WATCH */
 
 #ifdef CONFIG_AUDIT_TREE
diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c
new file mode 100644
index 00000000000000..27c6046c2c3d45
--- /dev/null
+++ b/kernel/audit_fsnotify.c
@@ -0,0 +1,216 @@
+/* audit_fsnotify.c -- tracking inodes
+ *
+ * Copyright 2003-2009,2014-2015 Red Hat, Inc.
+ * Copyright 2005 Hewlett-Packard Development Company, L.P.
+ * Copyright 2005 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/audit.h>
+#include <linux/kthread.h>
+#include <linux/mutex.h>
+#include <linux/fs.h>
+#include <linux/fsnotify_backend.h>
+#include <linux/namei.h>
+#include <linux/netlink.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/security.h>
+#include "audit.h"
+
+/*
+ * this mark lives on the parent directory of the inode in question.
+ * but dev, ino, and path are about the child
+ */
+struct audit_fsnotify_mark {
+	dev_t dev;		/* associated superblock device */
+	unsigned long ino;	/* associated inode number */
+	char *path;		/* insertion path */
+	struct fsnotify_mark mark; /* fsnotify mark on the inode */
+	struct audit_krule *rule;
+};
+
+/* fsnotify handle. */
+static struct fsnotify_group *audit_fsnotify_group;
+
+/* fsnotify events we care about. */
+#define AUDIT_FS_EVENTS (FS_MOVE | FS_CREATE | FS_DELETE | FS_DELETE_SELF |\
+			 FS_MOVE_SELF | FS_EVENT_ON_CHILD)
+
+static void audit_fsnotify_mark_free(struct audit_fsnotify_mark *audit_mark)
+{
+	kfree(audit_mark->path);
+	kfree(audit_mark);
+}
+
+static void audit_fsnotify_free_mark(struct fsnotify_mark *mark)
+{
+	struct audit_fsnotify_mark *audit_mark;
+
+	audit_mark = container_of(mark, struct audit_fsnotify_mark, mark);
+	audit_fsnotify_mark_free(audit_mark);
+}
+
+char *audit_mark_path(struct audit_fsnotify_mark *mark)
+{
+	return mark->path;
+}
+
+int audit_mark_compare(struct audit_fsnotify_mark *mark, unsigned long ino, dev_t dev)
+{
+	if (mark->ino == AUDIT_INO_UNSET)
+		return 0;
+	return (mark->ino == ino) && (mark->dev == dev);
+}
+
+static void audit_update_mark(struct audit_fsnotify_mark *audit_mark,
+			     struct inode *inode)
+{
+	audit_mark->dev = inode ? inode->i_sb->s_dev : AUDIT_DEV_UNSET;
+	audit_mark->ino = inode ? inode->i_ino : AUDIT_INO_UNSET;
+}
+
+struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pathname, int len)
+{
+	struct audit_fsnotify_mark *audit_mark;
+	struct path path;
+	struct dentry *dentry;
+	struct inode *inode;
+	int ret;
+
+	if (pathname[0] != '/' || pathname[len-1] == '/')
+		return ERR_PTR(-EINVAL);
+
+	dentry = kern_path_locked(pathname, &path);
+	if (IS_ERR(dentry))
+		return (void *)dentry; /* returning an error */
+	inode = path.dentry->d_inode;
+	mutex_unlock(&inode->i_mutex);
+
+	audit_mark = kzalloc(sizeof(*audit_mark), GFP_KERNEL);
+	if (unlikely(!audit_mark)) {
+		audit_mark = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	fsnotify_init_mark(&audit_mark->mark, audit_fsnotify_free_mark);
+	audit_mark->mark.mask = AUDIT_FS_EVENTS;
+	audit_mark->path = pathname;
+	audit_update_mark(audit_mark, dentry->d_inode);
+	audit_mark->rule = krule;
+
+	ret = fsnotify_add_mark(&audit_mark->mark, audit_fsnotify_group, inode, NULL, true);
+	if (ret < 0) {
+		audit_fsnotify_mark_free(audit_mark);
+		audit_mark = ERR_PTR(ret);
+	}
+out:
+	dput(dentry);
+	path_put(&path);
+	return audit_mark;
+}
+
+static void audit_mark_log_rule_change(struct audit_fsnotify_mark *audit_mark, char *op)
+{
+	struct audit_buffer *ab;
+	struct audit_krule *rule = audit_mark->rule;
+
+	if (!audit_enabled)
+		return;
+	ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE);
+	if (unlikely(!ab))
+		return;
+	audit_log_format(ab, "auid=%u ses=%u op=",
+			 from_kuid(&init_user_ns, audit_get_loginuid(current)),
+			 audit_get_sessionid(current));
+	audit_log_string(ab, op);
+	audit_log_format(ab, " path=");
+	audit_log_untrustedstring(ab, audit_mark->path);
+	audit_log_key(ab, rule->filterkey);
+	audit_log_format(ab, " list=%d res=1", rule->listnr);
+	audit_log_end(ab);
+}
+
+void audit_remove_mark(struct audit_fsnotify_mark *audit_mark)
+{
+	fsnotify_destroy_mark(&audit_mark->mark, audit_fsnotify_group);
+	fsnotify_put_mark(&audit_mark->mark);
+}
+
+void audit_remove_mark_rule(struct audit_krule *krule)
+{
+	struct audit_fsnotify_mark *mark = krule->exe;
+
+	audit_remove_mark(mark);
+}
+
+static void audit_autoremove_mark_rule(struct audit_fsnotify_mark *audit_mark)
+{
+	struct audit_krule *rule = audit_mark->rule;
+	struct audit_entry *entry = container_of(rule, struct audit_entry, rule);
+
+	audit_mark_log_rule_change(audit_mark, "autoremove_rule");
+	audit_del_rule(entry);
+}
+
+/* Update mark data in audit rules based on fsnotify events. */
+static int audit_mark_handle_event(struct fsnotify_group *group,
+				    struct inode *to_tell,
+				    struct fsnotify_mark *inode_mark,
+				    struct fsnotify_mark *vfsmount_mark,
+				    u32 mask, void *data, int data_type,
+				    const unsigned char *dname, u32 cookie)
+{
+	struct audit_fsnotify_mark *audit_mark;
+	struct inode *inode = NULL;
+
+	audit_mark = container_of(inode_mark, struct audit_fsnotify_mark, mark);
+
+	BUG_ON(group != audit_fsnotify_group);
+
+	switch (data_type) {
+	case (FSNOTIFY_EVENT_PATH):
+		inode = ((struct path *)data)->dentry->d_inode;
+		break;
+	case (FSNOTIFY_EVENT_INODE):
+		inode = (struct inode *)data;
+		break;
+	default:
+		BUG();
+		return 0;
+	};
+
+	if (mask & (FS_CREATE|FS_MOVED_TO|FS_DELETE|FS_MOVED_FROM)) {
+		if (audit_compare_dname_path(dname, audit_mark->path, AUDIT_NAME_FULL))
+			return 0;
+		audit_update_mark(audit_mark, inode);
+	} else if (mask & (FS_DELETE_SELF|FS_UNMOUNT|FS_MOVE_SELF))
+		audit_autoremove_mark_rule(audit_mark);
+
+	return 0;
+}
+
+static const struct fsnotify_ops audit_mark_fsnotify_ops = {
+	.handle_event =	audit_mark_handle_event,
+};
+
+static int __init audit_fsnotify_init(void)
+{
+	audit_fsnotify_group = fsnotify_alloc_group(&audit_mark_fsnotify_ops);
+	if (IS_ERR(audit_fsnotify_group)) {
+		audit_fsnotify_group = NULL;
+		audit_panic("cannot create audit fsnotify group");
+	}
+	return 0;
+}
+device_initcall(audit_fsnotify_init);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 7ca7d3b5aca2b2..b4d8c366ec3030 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -935,7 +935,7 @@ static inline int audit_add_rule(struct audit_entry *entry)
 }
 
 /* Remove an existing rule from filterlist. */
-static inline int audit_del_rule(struct audit_entry *entry)
+int audit_del_rule(struct audit_entry *entry)
 {
 	struct audit_entry  *e;
 	struct audit_tree *tree = entry->rule.tree;

From 34d99af52ad40bd498ba66970579a5bc1fb1a3bc Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Wed, 5 Aug 2015 16:29:37 -0400
Subject: [PATCH 110/734] audit: implement audit by executable

This adds the ability audit the actions of a not-yet-running process.

This patch implements the ability to filter on the executable path.  Instead of
just hard coding the ino and dev of the executable we care about at the moment
the rule is inserted into the kernel, use the new audit_fsnotify
infrastructure to manage this dynamically.  This means that if the filename
does not yet exist but the containing directory does, or if the inode in
question is unlinked and creat'd (aka updated) the rule will just continue to
work.  If the containing directory is moved or deleted or the filesystem is
unmounted, the rule is deleted automatically.  A future enhancement would be to
have the rule survive across directory disruptions.

This is a heavily modified version of a patch originally submitted by Eric
Paris with some ideas from Peter Moody.

Cc: Peter Moody <peter@hda3.com>
Cc: Eric Paris <eparis@redhat.com>
Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
[PM: minor whitespace clean to satisfy ./scripts/checkpatch]
Signed-off-by: Paul Moore <pmoore@redhat.com>
---
 include/linux/audit.h      |  1 +
 include/uapi/linux/audit.h |  5 +++-
 kernel/audit.h             |  4 +++
 kernel/audit_tree.c        |  2 ++
 kernel/audit_watch.c       | 31 ++++++++++++++++++++++
 kernel/auditfilter.c       | 53 +++++++++++++++++++++++++++++++++++++-
 kernel/auditsc.c           |  3 +++
 7 files changed, 97 insertions(+), 2 deletions(-)

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 759feb0e9d1312..b2abc996c25dab 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -62,6 +62,7 @@ struct audit_krule {
 	struct audit_field	*inode_f; /* quick access to an inode field */
 	struct audit_watch	*watch;	/* associated watch */
 	struct audit_tree	*tree;	/* associated watched tree */
+	struct audit_fsnotify_mark	*exe;
 	struct list_head	rlist;	/* entry in audit_{watch,tree}.rules list */
 	struct list_head	list;	/* for AUDIT_LIST* purposes only */
 	u64			prio;
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index d3475e1f15ec19..f6ff62c24aba94 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -266,6 +266,7 @@
 #define AUDIT_OBJ_UID	109
 #define AUDIT_OBJ_GID	110
 #define AUDIT_FIELD_COMPARE	111
+#define AUDIT_EXE	112
 
 #define AUDIT_ARG0      200
 #define AUDIT_ARG1      (AUDIT_ARG0+1)
@@ -324,8 +325,10 @@ enum {
 
 #define AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT	0x00000001
 #define AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME	0x00000002
+#define AUDIT_FEATURE_BITMAP_EXECUTABLE_PATH	0x00000004
 #define AUDIT_FEATURE_BITMAP_ALL (AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT | \
-				  AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME)
+				  AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME | \
+				  AUDIT_FEATURE_BITMAP_EXECUTABLE_PATH)
 
 /* deprecated: AUDIT_VERSION_* */
 #define AUDIT_VERSION_LATEST 		AUDIT_FEATURE_BITMAP_ALL
diff --git a/kernel/audit.h b/kernel/audit.h
index 7102d538737b2a..24ec861456673f 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -274,6 +274,8 @@ extern char *audit_mark_path(struct audit_fsnotify_mark *mark);
 extern void audit_remove_mark(struct audit_fsnotify_mark *audit_mark);
 extern void audit_remove_mark_rule(struct audit_krule *krule);
 extern int audit_mark_compare(struct audit_fsnotify_mark *mark, unsigned long ino, dev_t dev);
+extern int audit_dupe_exe(struct audit_krule *new, struct audit_krule *old);
+extern int audit_exe_compare(struct task_struct *tsk, struct audit_fsnotify_mark *mark);
 
 #else
 #define audit_put_watch(w) {}
@@ -289,6 +291,8 @@ extern int audit_mark_compare(struct audit_fsnotify_mark *mark, unsigned long in
 #define audit_remove_mark(m)
 #define audit_remove_mark_rule(k)
 #define audit_mark_compare(m, i, d) 0
+#define audit_exe_compare(t, m) (-EINVAL)
+#define audit_dupe_exe(n, o) (-EINVAL)
 #endif /* CONFIG_AUDIT_WATCH */
 
 #ifdef CONFIG_AUDIT_TREE
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 2e0c97427b339c..f417225068089c 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -478,6 +478,8 @@ static void kill_rules(struct audit_tree *tree)
 		if (rule->tree) {
 			/* not a half-baked one */
 			audit_tree_log_remove_rule(rule);
+			if (entry->rule.exe)
+				audit_remove_mark(entry->rule.exe);
 			rule->tree = NULL;
 			list_del_rcu(&entry->list);
 			list_del(&entry->rule.list);
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 645c6884cee54e..27ef8dcf7cd8ab 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -312,6 +312,8 @@ static void audit_update_watch(struct audit_parent *parent,
 				list_replace(&oentry->rule.list,
 					     &nentry->rule.list);
 			}
+			if (oentry->rule.exe)
+				audit_remove_mark(oentry->rule.exe);
 
 			audit_watch_log_rule_change(r, owatch, "updated_rules");
 
@@ -342,6 +344,8 @@ static void audit_remove_parent_watches(struct audit_parent *parent)
 		list_for_each_entry_safe(r, nextr, &w->rules, rlist) {
 			e = container_of(r, struct audit_entry, rule);
 			audit_watch_log_rule_change(r, w, "remove_rule");
+			if (e->rule.exe)
+				audit_remove_mark(e->rule.exe);
 			list_del(&r->rlist);
 			list_del(&r->list);
 			list_del_rcu(&e->list);
@@ -514,3 +518,30 @@ static int __init audit_watch_init(void)
 	return 0;
 }
 device_initcall(audit_watch_init);
+
+int audit_dupe_exe(struct audit_krule *new, struct audit_krule *old)
+{
+	struct audit_fsnotify_mark *audit_mark;
+	char *pathname;
+
+	pathname = kstrdup(audit_mark_path(old->exe), GFP_KERNEL);
+	if (!pathname)
+		return -ENOMEM;
+
+	audit_mark = audit_alloc_mark(new, pathname, strlen(pathname));
+	if (IS_ERR(audit_mark)) {
+		kfree(pathname);
+		return PTR_ERR(audit_mark);
+	}
+	new->exe = audit_mark;
+
+	return 0;
+}
+
+int audit_exe_compare(struct task_struct *tsk, struct audit_fsnotify_mark *mark)
+{
+	unsigned long ino = tsk->mm->exe_file->f_inode->i_ino;
+	dev_t dev = tsk->mm->exe_file->f_inode->i_sb->s_dev;
+
+	return audit_mark_compare(mark, ino, dev);
+}
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index b4d8c366ec3030..7714d93edb8505 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -405,6 +405,12 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f)
 		if (f->val > AUDIT_MAX_FIELD_COMPARE)
 			return -EINVAL;
 		break;
+	case AUDIT_EXE:
+		if (f->op != Audit_equal)
+			return -EINVAL;
+		if (entry->rule.listnr != AUDIT_FILTER_EXIT)
+			return -EINVAL;
+		break;
 	};
 	return 0;
 }
@@ -419,6 +425,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 	size_t remain = datasz - sizeof(struct audit_rule_data);
 	int i;
 	char *str;
+	struct audit_fsnotify_mark *audit_mark;
 
 	entry = audit_to_entry_common(data);
 	if (IS_ERR(entry))
@@ -539,6 +546,24 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 			entry->rule.buflen += f->val;
 			entry->rule.filterkey = str;
 			break;
+		case AUDIT_EXE:
+			if (entry->rule.exe || f->val > PATH_MAX)
+				goto exit_free;
+			str = audit_unpack_string(&bufp, &remain, f->val);
+			if (IS_ERR(str)) {
+				err = PTR_ERR(str);
+				goto exit_free;
+			}
+			entry->rule.buflen += f->val;
+
+			audit_mark = audit_alloc_mark(&entry->rule, str, f->val);
+			if (IS_ERR(audit_mark)) {
+				kfree(str);
+				err = PTR_ERR(audit_mark);
+				goto exit_free;
+			}
+			entry->rule.exe = audit_mark;
+			break;
 		}
 	}
 
@@ -551,6 +576,8 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 exit_free:
 	if (entry->rule.tree)
 		audit_put_tree(entry->rule.tree); /* that's the temporary one */
+	if (entry->rule.exe)
+		audit_remove_mark(entry->rule.exe); /* that's the template one */
 	audit_free_rule(entry);
 	return ERR_PTR(err);
 }
@@ -615,6 +642,10 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule)
 			data->buflen += data->values[i] =
 				audit_pack_string(&bufp, krule->filterkey);
 			break;
+		case AUDIT_EXE:
+			data->buflen += data->values[i] =
+				audit_pack_string(&bufp, audit_mark_path(krule->exe));
+			break;
 		case AUDIT_LOGINUID_SET:
 			if (krule->pflags & AUDIT_LOGINUID_LEGACY && !f->val) {
 				data->fields[i] = AUDIT_LOGINUID;
@@ -678,6 +709,12 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b)
 			if (strcmp(a->filterkey, b->filterkey))
 				return 1;
 			break;
+		case AUDIT_EXE:
+			/* both paths exist based on above type compare */
+			if (strcmp(audit_mark_path(a->exe),
+				   audit_mark_path(b->exe)))
+				return 1;
+			break;
 		case AUDIT_UID:
 		case AUDIT_EUID:
 		case AUDIT_SUID:
@@ -799,8 +836,14 @@ struct audit_entry *audit_dupe_rule(struct audit_krule *old)
 				err = -ENOMEM;
 			else
 				new->filterkey = fk;
+			break;
+		case AUDIT_EXE:
+			err = audit_dupe_exe(new, old);
+			break;
 		}
 		if (err) {
+			if (new->exe)
+				audit_remove_mark(new->exe);
 			audit_free_rule(entry);
 			return ERR_PTR(err);
 		}
@@ -963,6 +1006,9 @@ int audit_del_rule(struct audit_entry *entry)
 	if (e->rule.tree)
 		audit_remove_tree_rule(&e->rule);
 
+	if (e->rule.exe)
+		audit_remove_mark_rule(&e->rule);
+
 #ifdef CONFIG_AUDITSYSCALL
 	if (!dont_count)
 		audit_n_rules--;
@@ -1067,8 +1113,11 @@ int audit_rule_change(int type, __u32 portid, int seq, void *data,
 		WARN_ON(1);
 	}
 
-	if (err || type == AUDIT_DEL_RULE)
+	if (err || type == AUDIT_DEL_RULE) {
+		if (entry->rule.exe)
+			audit_remove_mark(entry->rule.exe);
 		audit_free_rule(entry);
+	}
 
 	return err;
 }
@@ -1360,6 +1409,8 @@ static int update_lsm_rule(struct audit_krule *r)
 		return 0;
 
 	nentry = audit_dupe_rule(r);
+	if (entry->rule.exe)
+		audit_remove_mark(entry->rule.exe);
 	if (IS_ERR(nentry)) {
 		/* save the first error encountered for the
 		 * return value */
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index ea3fe2b748a854..9b56b7ae053fb5 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -466,6 +466,9 @@ static int audit_filter_rules(struct task_struct *tsk,
 				result = audit_comparator(ctx->ppid, f->op, f->val);
 			}
 			break;
+		case AUDIT_EXE:
+			result = audit_exe_compare(tsk, rule->exe);
+			break;
 		case AUDIT_UID:
 			result = audit_uid_comparator(cred->uid, f->op, f->uid);
 			break;

From 1635e88885a8e16dec21206c981421f1f3c3b1df Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Thu, 6 Aug 2015 10:35:23 +0800
Subject: [PATCH 111/734] regmap: debugfs: Fix misuse of IS_ENABLED

IS_ENABLED should only be used for CONFIG_* symbols.

I have done a small test:
  #define REGMAP_ALLOW_WRITE_DEBUGFS
  IS_ENABLED(REGMAP_ALLOW_WRITE_DEBUGFS) returns 0.

  #define REGMAP_ALLOW_WRITE_DEBUGFS 0
  IS_ENABLED(REGMAP_ALLOW_WRITE_DEBUGFS) returns 0.

  #define REGMAP_ALLOW_WRITE_DEBUGFS 1
  IS_ENABLED(REGMAP_ALLOW_WRITE_DEBUGFS) returns 1.

  #define REGMAP_ALLOW_WRITE_DEBUGFS 2
  IS_ENABLED(REGMAP_ALLOW_WRITE_DEBUGFS) returns 0.

So fix the misuse of IS_ENABLED(REGMAP_ALLOW_WRITE_DEBUGFS) and switch to
use #if defined(REGMAP_ALLOW_WRITE_DEBUGFS) instead.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap-debugfs.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c
index 6a61e4fa73a28a..f42f2bac646623 100644
--- a/drivers/base/regmap/regmap-debugfs.c
+++ b/drivers/base/regmap/regmap-debugfs.c
@@ -599,10 +599,11 @@ void regmap_debugfs_init(struct regmap *map, const char *name)
 	if (map->max_register || regmap_readable(map, 0)) {
 		umode_t registers_mode;
 
-		if (IS_ENABLED(REGMAP_ALLOW_WRITE_DEBUGFS))
-			registers_mode = 0600;
-		else
-			registers_mode = 0400;
+#if defined(REGMAP_ALLOW_WRITE_DEBUGFS)
+		registers_mode = 0600;
+#else
+		registers_mode = 0400;
+#endif
 
 		debugfs_create_file("registers", registers_mode, map->debugfs,
 				    map, &regmap_map_fops);

From bbb4d872c795ab3a1c0610f69d05fcd93aef83f6 Mon Sep 17 00:00:00 2001
From: Nicolas Boichat <drinkcat@chromium.org>
Date: Wed, 8 Jul 2015 14:30:16 +0800
Subject: [PATCH 112/734] mfd: vexpress: Add parentheses around
 bridge->ops->regmap_init call

regmap_init(...) is a macro since commit
"regmap: Use different lockdep class for each regmap init call".
That same name is used as a function pointer: prevent its expansion
by adding parentheses around the function pointer.

Signed-off-by: Nicolas Boichat <drinkcat@chromium.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/bus/vexpress-config.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/bus/vexpress-config.c b/drivers/bus/vexpress-config.c
index a64763b6b5fd1e..6575c0fe6a4ea3 100644
--- a/drivers/bus/vexpress-config.c
+++ b/drivers/bus/vexpress-config.c
@@ -107,7 +107,7 @@ struct regmap *devm_regmap_init_vexpress_config(struct device *dev)
 	if (!res)
 		return ERR_PTR(-ENOMEM);
 
-	regmap = bridge->ops->regmap_init(dev, bridge->context);
+	regmap = (bridge->ops->regmap_init)(dev, bridge->context);
 	if (IS_ERR(regmap)) {
 		devres_free(res);
 		return regmap;

From 331a5fc9f2ed28033ddda89acb2a9b43592a545d Mon Sep 17 00:00:00 2001
From: Nicolas Boichat <drinkcat@chromium.org>
Date: Wed, 8 Jul 2015 14:30:17 +0800
Subject: [PATCH 113/734] thermal: sti: Add parentheses around
 bridge->ops->regmap_init call

regmap_init(...) is a macro since commit
"regmap: Use different lockdep class for each regmap init call".
That same name is used as a function pointer: prevent its expansion
by adding parentheses around the function pointer.

Signed-off-by: Nicolas Boichat <drinkcat@chromium.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/thermal/st/st_thermal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/thermal/st/st_thermal.c b/drivers/thermal/st/st_thermal.c
index 76c515dd802b48..88c759d746c3f6 100644
--- a/drivers/thermal/st/st_thermal.c
+++ b/drivers/thermal/st/st_thermal.c
@@ -214,7 +214,7 @@ int st_thermal_register(struct platform_device *pdev,
 
 	sensor->ops = sensor->cdata->ops;
 
-	ret = sensor->ops->regmap_init(sensor);
+	ret = (sensor->ops->regmap_init)(sensor);
 	if (ret)
 		return ret;
 

From 3cfe7a74d42b7e3644f8b2b26aa20146d4f90f0f Mon Sep 17 00:00:00 2001
From: Nicolas Boichat <drinkcat@chromium.org>
Date: Wed, 8 Jul 2015 14:30:18 +0800
Subject: [PATCH 114/734] regmap: Use different lockdep class for each regmap
 init call

Lockdep validator complains about recursive locking and deadlock
when two different regmap instances are called in a nested order.
That happens anytime a regmap read/write call needs to access
another regmap.

This is because, for performance reason, lockdep groups all locks
initialized by the same mutex_init() in the same lock class.
Therefore all regmap mutexes are in the same lock class, leading
to lockdep "nested locking" warnings if a regmap accesses another
regmap.

In general, it is impossible to establish in advance the hierarchy
of regmaps, so we make sure that each regmap init call initializes
its own static lock_class_key. This is done by wrapping all
regmap_init calls into macros.

This also allows us to give meaningful names to the lock_class_key.
For example, in rt5677 case, we have in /proc/lockdep_chains:
irq_context: 0
[ffffffc0018d2198] &dev->mutex
[ffffffc0018d2198] &dev->mutex
[ffffffc001bd7f60] rt5677:5104:(&rt5677_regmap)->_lock
[ffffffc001bd7f58] rt5677:5096:(&rt5677_regmap_physical)->_lock
[ffffffc001b95448] &(&base->lock)->rlock

The above would have resulted in a lockdep recursive warning
previously. This is not the case anymore as the lockdep validator
now clearly identifies the 2 regmaps as separate.

Signed-off-by: Nicolas Boichat <drinkcat@chromium.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap-ac97.c |  22 ++--
 drivers/base/regmap/regmap-i2c.c  |  22 ++--
 drivers/base/regmap/regmap-mmio.c |  27 +++--
 drivers/base/regmap/regmap-spi.c  |  22 ++--
 drivers/base/regmap/regmap-spmi.c |  44 ++++---
 drivers/base/regmap/regmap.c      |  31 +++--
 include/linux/regmap.h            | 192 ++++++++++++++++++++++--------
 7 files changed, 250 insertions(+), 110 deletions(-)

diff --git a/drivers/base/regmap/regmap-ac97.c b/drivers/base/regmap/regmap-ac97.c
index 8d304e2a943d3c..aa631be8b82183 100644
--- a/drivers/base/regmap/regmap-ac97.c
+++ b/drivers/base/regmap/regmap-ac97.c
@@ -87,12 +87,15 @@ static const struct regmap_bus ac97_regmap_bus = {
  * The return value will be an ERR_PTR() on error or a valid pointer to
  * a struct regmap.
  */
-struct regmap *regmap_init_ac97(struct snd_ac97 *ac97,
-				const struct regmap_config *config)
+struct regmap *__regmap_init_ac97(struct snd_ac97 *ac97,
+				  const struct regmap_config *config,
+				  struct lock_class_key *lock_key,
+				  const char *lock_name)
 {
-	return regmap_init(&ac97->dev, &ac97_regmap_bus, ac97, config);
+	return __regmap_init(&ac97->dev, &ac97_regmap_bus, ac97, config,
+			     lock_key, lock_name);
 }
-EXPORT_SYMBOL_GPL(regmap_init_ac97);
+EXPORT_SYMBOL_GPL(__regmap_init_ac97);
 
 /**
  * devm_regmap_init_ac97(): Initialise AC'97 register map
@@ -104,11 +107,14 @@ EXPORT_SYMBOL_GPL(regmap_init_ac97);
  * to a struct regmap.  The regmap will be automatically freed by the
  * device management code.
  */
-struct regmap *devm_regmap_init_ac97(struct snd_ac97 *ac97,
-				     const struct regmap_config *config)
+struct regmap *__devm_regmap_init_ac97(struct snd_ac97 *ac97,
+				       const struct regmap_config *config,
+				       struct lock_class_key *lock_key,
+				       const char *lock_name)
 {
-	return devm_regmap_init(&ac97->dev, &ac97_regmap_bus, ac97, config);
+	return __devm_regmap_init(&ac97->dev, &ac97_regmap_bus, ac97, config,
+				  lock_key, lock_name);
 }
-EXPORT_SYMBOL_GPL(devm_regmap_init_ac97);
+EXPORT_SYMBOL_GPL(__devm_regmap_init_ac97);
 
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/base/regmap/regmap-i2c.c b/drivers/base/regmap/regmap-i2c.c
index 4b76e33110a2d1..3163b22e2baf06 100644
--- a/drivers/base/regmap/regmap-i2c.c
+++ b/drivers/base/regmap/regmap-i2c.c
@@ -242,17 +242,20 @@ static const struct regmap_bus *regmap_get_i2c_bus(struct i2c_client *i2c,
  * The return value will be an ERR_PTR() on error or a valid pointer to
  * a struct regmap.
  */
-struct regmap *regmap_init_i2c(struct i2c_client *i2c,
-			       const struct regmap_config *config)
+struct regmap *__regmap_init_i2c(struct i2c_client *i2c,
+				 const struct regmap_config *config,
+				 struct lock_class_key *lock_key,
+				 const char *lock_name)
 {
 	const struct regmap_bus *bus = regmap_get_i2c_bus(i2c, config);
 
 	if (IS_ERR(bus))
 		return ERR_CAST(bus);
 
-	return regmap_init(&i2c->dev, bus, &i2c->dev, config);
+	return __regmap_init(&i2c->dev, bus, &i2c->dev, config,
+			     lock_key, lock_name);
 }
-EXPORT_SYMBOL_GPL(regmap_init_i2c);
+EXPORT_SYMBOL_GPL(__regmap_init_i2c);
 
 /**
  * devm_regmap_init_i2c(): Initialise managed register map
@@ -264,16 +267,19 @@ EXPORT_SYMBOL_GPL(regmap_init_i2c);
  * to a struct regmap.  The regmap will be automatically freed by the
  * device management code.
  */
-struct regmap *devm_regmap_init_i2c(struct i2c_client *i2c,
-				    const struct regmap_config *config)
+struct regmap *__devm_regmap_init_i2c(struct i2c_client *i2c,
+				      const struct regmap_config *config,
+				      struct lock_class_key *lock_key,
+				      const char *lock_name)
 {
 	const struct regmap_bus *bus = regmap_get_i2c_bus(i2c, config);
 
 	if (IS_ERR(bus))
 		return ERR_CAST(bus);
 
-	return devm_regmap_init(&i2c->dev, bus, &i2c->dev, config);
+	return __devm_regmap_init(&i2c->dev, bus, &i2c->dev, config,
+				  lock_key, lock_name);
 }
-EXPORT_SYMBOL_GPL(devm_regmap_init_i2c);
+EXPORT_SYMBOL_GPL(__devm_regmap_init_i2c);
 
 MODULE_LICENSE("GPL");
diff --git a/drivers/base/regmap/regmap-mmio.c b/drivers/base/regmap/regmap-mmio.c
index 04a329a377e96b..a1b2b270e4bc3c 100644
--- a/drivers/base/regmap/regmap-mmio.c
+++ b/drivers/base/regmap/regmap-mmio.c
@@ -307,9 +307,11 @@ static struct regmap_mmio_context *regmap_mmio_gen_context(struct device *dev,
  * The return value will be an ERR_PTR() on error or a valid pointer to
  * a struct regmap.
  */
-struct regmap *regmap_init_mmio_clk(struct device *dev, const char *clk_id,
-				    void __iomem *regs,
-				    const struct regmap_config *config)
+struct regmap *__regmap_init_mmio_clk(struct device *dev, const char *clk_id,
+				      void __iomem *regs,
+				      const struct regmap_config *config,
+				      struct lock_class_key *lock_key,
+				      const char *lock_name)
 {
 	struct regmap_mmio_context *ctx;
 
@@ -317,9 +319,10 @@ struct regmap *regmap_init_mmio_clk(struct device *dev, const char *clk_id,
 	if (IS_ERR(ctx))
 		return ERR_CAST(ctx);
 
-	return regmap_init(dev, &regmap_mmio, ctx, config);
+	return __regmap_init(dev, &regmap_mmio, ctx, config,
+			     lock_key, lock_name);
 }
-EXPORT_SYMBOL_GPL(regmap_init_mmio_clk);
+EXPORT_SYMBOL_GPL(__regmap_init_mmio_clk);
 
 /**
  * devm_regmap_init_mmio_clk(): Initialise managed register map with clock
@@ -333,9 +336,12 @@ EXPORT_SYMBOL_GPL(regmap_init_mmio_clk);
  * to a struct regmap.  The regmap will be automatically freed by the
  * device management code.
  */
-struct regmap *devm_regmap_init_mmio_clk(struct device *dev, const char *clk_id,
-					 void __iomem *regs,
-					 const struct regmap_config *config)
+struct regmap *__devm_regmap_init_mmio_clk(struct device *dev,
+					   const char *clk_id,
+					   void __iomem *regs,
+					   const struct regmap_config *config,
+					   struct lock_class_key *lock_key,
+					   const char *lock_name)
 {
 	struct regmap_mmio_context *ctx;
 
@@ -343,8 +349,9 @@ struct regmap *devm_regmap_init_mmio_clk(struct device *dev, const char *clk_id,
 	if (IS_ERR(ctx))
 		return ERR_CAST(ctx);
 
-	return devm_regmap_init(dev, &regmap_mmio, ctx, config);
+	return __devm_regmap_init(dev, &regmap_mmio, ctx, config,
+				  lock_key, lock_name);
 }
-EXPORT_SYMBOL_GPL(devm_regmap_init_mmio_clk);
+EXPORT_SYMBOL_GPL(__devm_regmap_init_mmio_clk);
 
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/base/regmap/regmap-spi.c b/drivers/base/regmap/regmap-spi.c
index 53d1148e80a05e..4c7850d660d152 100644
--- a/drivers/base/regmap/regmap-spi.c
+++ b/drivers/base/regmap/regmap-spi.c
@@ -122,12 +122,15 @@ static struct regmap_bus regmap_spi = {
  * The return value will be an ERR_PTR() on error or a valid pointer to
  * a struct regmap.
  */
-struct regmap *regmap_init_spi(struct spi_device *spi,
-			       const struct regmap_config *config)
+struct regmap *__regmap_init_spi(struct spi_device *spi,
+				 const struct regmap_config *config,
+				 struct lock_class_key *lock_key,
+				 const char *lock_name)
 {
-	return regmap_init(&spi->dev, &regmap_spi, &spi->dev, config);
+	return __regmap_init(&spi->dev, &regmap_spi, &spi->dev, config,
+			     lock_key, lock_name);
 }
-EXPORT_SYMBOL_GPL(regmap_init_spi);
+EXPORT_SYMBOL_GPL(__regmap_init_spi);
 
 /**
  * devm_regmap_init_spi(): Initialise register map
@@ -139,11 +142,14 @@ EXPORT_SYMBOL_GPL(regmap_init_spi);
  * to a struct regmap.  The map will be automatically freed by the
  * device management code.
  */
-struct regmap *devm_regmap_init_spi(struct spi_device *spi,
-				    const struct regmap_config *config)
+struct regmap *__devm_regmap_init_spi(struct spi_device *spi,
+				      const struct regmap_config *config,
+				      struct lock_class_key *lock_key,
+				      const char *lock_name)
 {
-	return devm_regmap_init(&spi->dev, &regmap_spi, &spi->dev, config);
+	return __devm_regmap_init(&spi->dev, &regmap_spi, &spi->dev, config,
+				  lock_key, lock_name);
 }
-EXPORT_SYMBOL_GPL(devm_regmap_init_spi);
+EXPORT_SYMBOL_GPL(__devm_regmap_init_spi);
 
 MODULE_LICENSE("GPL");
diff --git a/drivers/base/regmap/regmap-spmi.c b/drivers/base/regmap/regmap-spmi.c
index d7026dc33388aa..7f50f5862d395a 100644
--- a/drivers/base/regmap/regmap-spmi.c
+++ b/drivers/base/regmap/regmap-spmi.c
@@ -99,12 +99,15 @@ static struct regmap_bus regmap_spmi_base = {
  * The return value will be an ERR_PTR() on error or a valid pointer to
  * a struct regmap.
  */
-struct regmap *regmap_init_spmi_base(struct spmi_device *sdev,
-				     const struct regmap_config *config)
+struct regmap *__regmap_init_spmi_base(struct spmi_device *sdev,
+				       const struct regmap_config *config,
+				       struct lock_class_key *lock_key,
+				       const char *lock_name)
 {
-	return regmap_init(&sdev->dev, &regmap_spmi_base, sdev, config);
+	return __regmap_init(&sdev->dev, &regmap_spmi_base, sdev, config,
+			     lock_key, lock_name);
 }
-EXPORT_SYMBOL_GPL(regmap_init_spmi_base);
+EXPORT_SYMBOL_GPL(__regmap_init_spmi_base);
 
 /**
  * devm_regmap_init_spmi_base(): Create managed regmap for Base register space
@@ -115,12 +118,15 @@ EXPORT_SYMBOL_GPL(regmap_init_spmi_base);
  * to a struct regmap.  The regmap will be automatically freed by the
  * device management code.
  */
-struct regmap *devm_regmap_init_spmi_base(struct spmi_device *sdev,
-					  const struct regmap_config *config)
+struct regmap *__devm_regmap_init_spmi_base(struct spmi_device *sdev,
+					    const struct regmap_config *config,
+					    struct lock_class_key *lock_key,
+					    const char *lock_name)
 {
-	return devm_regmap_init(&sdev->dev, &regmap_spmi_base, sdev, config);
+	return __devm_regmap_init(&sdev->dev, &regmap_spmi_base, sdev, config,
+				  lock_key, lock_name);
 }
-EXPORT_SYMBOL_GPL(devm_regmap_init_spmi_base);
+EXPORT_SYMBOL_GPL(__devm_regmap_init_spmi_base);
 
 static int regmap_spmi_ext_read(void *context,
 				const void *reg, size_t reg_size,
@@ -230,12 +236,15 @@ static struct regmap_bus regmap_spmi_ext = {
  * The return value will be an ERR_PTR() on error or a valid pointer to
  * a struct regmap.
  */
-struct regmap *regmap_init_spmi_ext(struct spmi_device *sdev,
-				    const struct regmap_config *config)
+struct regmap *__regmap_init_spmi_ext(struct spmi_device *sdev,
+				      const struct regmap_config *config,
+				      struct lock_class_key *lock_key,
+				      const char *lock_name)
 {
-	return regmap_init(&sdev->dev, &regmap_spmi_ext, sdev, config);
+	return __regmap_init(&sdev->dev, &regmap_spmi_ext, sdev, config,
+			     lock_key, lock_name);
 }
-EXPORT_SYMBOL_GPL(regmap_init_spmi_ext);
+EXPORT_SYMBOL_GPL(__regmap_init_spmi_ext);
 
 /**
  * devm_regmap_init_spmi_ext(): Create managed regmap for Ext register space
@@ -246,11 +255,14 @@ EXPORT_SYMBOL_GPL(regmap_init_spmi_ext);
  * to a struct regmap.  The regmap will be automatically freed by the
  * device management code.
  */
-struct regmap *devm_regmap_init_spmi_ext(struct spmi_device *sdev,
-				     const struct regmap_config *config)
+struct regmap *__devm_regmap_init_spmi_ext(struct spmi_device *sdev,
+					   const struct regmap_config *config,
+					   struct lock_class_key *lock_key,
+					   const char *lock_name)
 {
-	return devm_regmap_init(&sdev->dev, &regmap_spmi_ext, sdev, config);
+	return __devm_regmap_init(&sdev->dev, &regmap_spmi_ext, sdev, config,
+				  lock_key, lock_name);
 }
-EXPORT_SYMBOL_GPL(devm_regmap_init_spmi_ext);
+EXPORT_SYMBOL_GPL(__devm_regmap_init_spmi_ext);
 
 MODULE_LICENSE("GPL");
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 7111d04f26218b..b9fddccd6e06fc 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -527,10 +527,12 @@ EXPORT_SYMBOL_GPL(regmap_get_val_endian);
  * a struct regmap.  This function should generally not be called
  * directly, it should be called by bus-specific init functions.
  */
-struct regmap *regmap_init(struct device *dev,
-			   const struct regmap_bus *bus,
-			   void *bus_context,
-			   const struct regmap_config *config)
+struct regmap *__regmap_init(struct device *dev,
+			     const struct regmap_bus *bus,
+			     void *bus_context,
+			     const struct regmap_config *config,
+			     struct lock_class_key *lock_key,
+			     const char *lock_name)
 {
 	struct regmap *map;
 	int ret = -EINVAL;
@@ -556,10 +558,14 @@ struct regmap *regmap_init(struct device *dev,
 			spin_lock_init(&map->spinlock);
 			map->lock = regmap_lock_spinlock;
 			map->unlock = regmap_unlock_spinlock;
+			lockdep_set_class_and_name(&map->spinlock,
+						   lock_key, lock_name);
 		} else {
 			mutex_init(&map->mutex);
 			map->lock = regmap_lock_mutex;
 			map->unlock = regmap_unlock_mutex;
+			lockdep_set_class_and_name(&map->mutex,
+						   lock_key, lock_name);
 		}
 		map->lock_arg = map;
 	}
@@ -899,7 +905,7 @@ struct regmap *regmap_init(struct device *dev,
 err:
 	return ERR_PTR(ret);
 }
-EXPORT_SYMBOL_GPL(regmap_init);
+EXPORT_SYMBOL_GPL(__regmap_init);
 
 static void devm_regmap_release(struct device *dev, void *res)
 {
@@ -919,10 +925,12 @@ static void devm_regmap_release(struct device *dev, void *res)
  * directly, it should be called by bus-specific init functions.  The
  * map will be automatically freed by the device management code.
  */
-struct regmap *devm_regmap_init(struct device *dev,
-				const struct regmap_bus *bus,
-				void *bus_context,
-				const struct regmap_config *config)
+struct regmap *__devm_regmap_init(struct device *dev,
+				  const struct regmap_bus *bus,
+				  void *bus_context,
+				  const struct regmap_config *config,
+				  struct lock_class_key *lock_key,
+				  const char *lock_name)
 {
 	struct regmap **ptr, *regmap;
 
@@ -930,7 +938,8 @@ struct regmap *devm_regmap_init(struct device *dev,
 	if (!ptr)
 		return ERR_PTR(-ENOMEM);
 
-	regmap = regmap_init(dev, bus, bus_context, config);
+	regmap = __regmap_init(dev, bus, bus_context, config,
+			       lock_key, lock_name);
 	if (!IS_ERR(regmap)) {
 		*ptr = regmap;
 		devres_add(dev, ptr);
@@ -940,7 +949,7 @@ struct regmap *devm_regmap_init(struct device *dev,
 
 	return regmap;
 }
-EXPORT_SYMBOL_GPL(devm_regmap_init);
+EXPORT_SYMBOL_GPL(__devm_regmap_init);
 
 static void regmap_field_init(struct regmap_field *rm_field,
 	struct regmap *regmap, struct reg_field reg_field)
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 59c55ea0f0b50c..5d7027286032d2 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -17,6 +17,7 @@
 #include <linux/rbtree.h>
 #include <linux/err.h>
 #include <linux/bug.h>
+#include <linux/lockdep.h>
 
 struct module;
 struct device;
@@ -324,46 +325,147 @@ struct regmap_bus {
 	enum regmap_endian val_format_endian_default;
 };
 
-struct regmap *regmap_init(struct device *dev,
-			   const struct regmap_bus *bus,
-			   void *bus_context,
-			   const struct regmap_config *config);
-int regmap_attach_dev(struct device *dev, struct regmap *map,
-				 const struct regmap_config *config);
-struct regmap *regmap_init_i2c(struct i2c_client *i2c,
-			       const struct regmap_config *config);
-struct regmap *regmap_init_spi(struct spi_device *dev,
-			       const struct regmap_config *config);
-struct regmap *regmap_init_spmi_base(struct spmi_device *dev,
-				     const struct regmap_config *config);
-struct regmap *regmap_init_spmi_ext(struct spmi_device *dev,
-				    const struct regmap_config *config);
-struct regmap *regmap_init_mmio_clk(struct device *dev, const char *clk_id,
-				    void __iomem *regs,
-				    const struct regmap_config *config);
-struct regmap *regmap_init_ac97(struct snd_ac97 *ac97,
-				const struct regmap_config *config);
-
-struct regmap *devm_regmap_init(struct device *dev,
-				const struct regmap_bus *bus,
-				void *bus_context,
-				const struct regmap_config *config);
-struct regmap *devm_regmap_init_i2c(struct i2c_client *i2c,
-				    const struct regmap_config *config);
-struct regmap *devm_regmap_init_spi(struct spi_device *dev,
-				    const struct regmap_config *config);
-struct regmap *devm_regmap_init_spmi_base(struct spmi_device *dev,
-					  const struct regmap_config *config);
-struct regmap *devm_regmap_init_spmi_ext(struct spmi_device *dev,
-					 const struct regmap_config *config);
-struct regmap *devm_regmap_init_mmio_clk(struct device *dev, const char *clk_id,
-					 void __iomem *regs,
-					 const struct regmap_config *config);
-struct regmap *devm_regmap_init_ac97(struct snd_ac97 *ac97,
-				     const struct regmap_config *config);
+/*
+ * __regmap_init functions.
+ *
+ * These functions take a lock key and name parameter, and should not be called
+ * directly. Instead, use the regmap_init macros that generate a key and name
+ * for each call.
+ */
+struct regmap *__regmap_init(struct device *dev,
+			     const struct regmap_bus *bus,
+			     void *bus_context,
+			     const struct regmap_config *config,
+			     struct lock_class_key *lock_key,
+			     const char *lock_name);
+struct regmap *__regmap_init_i2c(struct i2c_client *i2c,
+				 const struct regmap_config *config,
+				 struct lock_class_key *lock_key,
+				 const char *lock_name);
+struct regmap *__regmap_init_spi(struct spi_device *dev,
+				 const struct regmap_config *config,
+				 struct lock_class_key *lock_key,
+				 const char *lock_name);
+struct regmap *__regmap_init_spmi_base(struct spmi_device *dev,
+				       const struct regmap_config *config,
+				       struct lock_class_key *lock_key,
+				       const char *lock_name);
+struct regmap *__regmap_init_spmi_ext(struct spmi_device *dev,
+				      const struct regmap_config *config,
+				      struct lock_class_key *lock_key,
+				      const char *lock_name);
+struct regmap *__regmap_init_mmio_clk(struct device *dev, const char *clk_id,
+				      void __iomem *regs,
+				      const struct regmap_config *config,
+				      struct lock_class_key *lock_key,
+				      const char *lock_name);
+struct regmap *__regmap_init_ac97(struct snd_ac97 *ac97,
+				  const struct regmap_config *config,
+				  struct lock_class_key *lock_key,
+				  const char *lock_name);
+
+struct regmap *__devm_regmap_init(struct device *dev,
+				  const struct regmap_bus *bus,
+				  void *bus_context,
+				  const struct regmap_config *config,
+				  struct lock_class_key *lock_key,
+				  const char *lock_name);
+struct regmap *__devm_regmap_init_i2c(struct i2c_client *i2c,
+				      const struct regmap_config *config,
+				      struct lock_class_key *lock_key,
+				      const char *lock_name);
+struct regmap *__devm_regmap_init_spi(struct spi_device *dev,
+				      const struct regmap_config *config,
+				      struct lock_class_key *lock_key,
+				      const char *lock_name);
+struct regmap *__devm_regmap_init_spmi_base(struct spmi_device *dev,
+					    const struct regmap_config *config,
+					    struct lock_class_key *lock_key,
+					    const char *lock_name);
+struct regmap *__devm_regmap_init_spmi_ext(struct spmi_device *dev,
+					   const struct regmap_config *config,
+					   struct lock_class_key *lock_key,
+					   const char *lock_name);
+struct regmap *__devm_regmap_init_mmio_clk(struct device *dev,
+					   const char *clk_id,
+					   void __iomem *regs,
+					   const struct regmap_config *config,
+					   struct lock_class_key *lock_key,
+					   const char *lock_name);
+struct regmap *__devm_regmap_init_ac97(struct snd_ac97 *ac97,
+				       const struct regmap_config *config,
+				       struct lock_class_key *lock_key,
+				       const char *lock_name);
 
+/*
+ * Wrapper for regmap_init macros to include a unique lockdep key and name
+ * for each call. No-op if CONFIG_LOCKDEP is not set.
+ *
+ * @fn: Real function to call (in the form __[*_]regmap_init[_*])
+ * @name: Config variable name (#config in the calling macro)
+ **/
+#ifdef CONFIG_LOCKDEP
+#define __regmap_lockdep_wrapper(fn, name, ...)				\
+(									\
+	({								\
+		static struct lock_class_key _key;			\
+		fn(__VA_ARGS__, &_key,					\
+			KBUILD_BASENAME ":"				\
+			__stringify(__LINE__) ":"			\
+			"(" name ")->lock");				\
+	})								\
+)
+#else
+#define __regmap_lockdep_wrapper(fn, name, ...) fn(__VA_ARGS__, NULL, NULL)
+#endif
+
+#define regmap_init(dev, bus, bus_context, config)			\
+	__regmap_lockdep_wrapper(__regmap_init, #config,		\
+				dev, bus, bus_context, config)
+int regmap_attach_dev(struct device *dev, struct regmap *map,
+		      const struct regmap_config *config);
+#define regmap_init_i2c(i2c, config)					\
+	__regmap_lockdep_wrapper(__regmap_init_i2c, #config,		\
+				i2c, config)
+#define regmap_init_spi(dev, config)					\
+	__regmap_lockdep_wrapper(__regmap_init_spi, #config,		\
+				dev, config)
+#define regmap_init_spmi_base(dev, config)				\
+	__regmap_lockdep_wrapper(__regmap_init_spmi_base, #config,	\
+				dev, config)
+#define regmap_init_spmi_ext(dev, config)				\
+	__regmap_lockdep_wrapper(__regmap_init_spmi_ext, #config,	\
+				dev, config)
+#define regmap_init_mmio_clk(dev, clk_id, regs, config)			\
+	__regmap_lockdep_wrapper(__regmap_init_mmio_clk, #config,	\
+				dev, clk_id, regs, config)
+#define regmap_init_ac97(ac97, config)					\
+	__regmap_lockdep_wrapper(__regmap_init_ac97, #config,		\
+				ac97, config)
 bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg);
 
+#define devm_regmap_init(dev, bus, bus_context, config)			\
+	__regmap_lockdep_wrapper(__devm_regmap_init, #config,		\
+				dev, bus, bus_context, config)
+#define devm_regmap_init_i2c(i2c, config)				\
+	__regmap_lockdep_wrapper(__devm_regmap_init_i2c, #config,	\
+				i2c, config)
+#define devm_regmap_init_spi(dev, config)				\
+	__regmap_lockdep_wrapper(__devm_regmap_init_spi, #config,	\
+				dev, config)
+#define devm_regmap_init_spmi_base(dev, config)				\
+	__regmap_lockdep_wrapper(__devm_regmap_init_spmi_base, #config,	\
+				dev, config)
+#define devm_regmap_init_spmi_ext(dev, config)				\
+	__regmap_lockdep_wrapper(__devm_regmap_init_spmi_ext, #config,	\
+				dev, config)
+#define devm_regmap_init_mmio_clk(dev, clk_id, regs, config)		\
+	__regmap_lockdep_wrapper(__devm_regmap_init_mmio_clk, #config,	\
+				dev, clk_id, regs, config)
+#define devm_regmap_init_ac97(ac97, config)				\
+	__regmap_lockdep_wrapper(__devm_regmap_init_ac97, #config,	\
+				ac97, config)
+
 /**
  * regmap_init_mmio(): Initialise register map
  *
@@ -374,12 +476,8 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg);
  * The return value will be an ERR_PTR() on error or a valid pointer to
  * a struct regmap.
  */
-static inline struct regmap *regmap_init_mmio(struct device *dev,
-					void __iomem *regs,
-					const struct regmap_config *config)
-{
-	return regmap_init_mmio_clk(dev, NULL, regs, config);
-}
+#define regmap_init_mmio(dev, regs, config)		\
+	regmap_init_mmio_clk(dev, NULL, regs, config)
 
 /**
  * devm_regmap_init_mmio(): Initialise managed register map
@@ -392,12 +490,8 @@ static inline struct regmap *regmap_init_mmio(struct device *dev,
  * to a struct regmap.  The regmap will be automatically freed by the
  * device management code.
  */
-static inline struct regmap *devm_regmap_init_mmio(struct device *dev,
-					void __iomem *regs,
-					const struct regmap_config *config)
-{
-	return devm_regmap_init_mmio_clk(dev, NULL, regs, config);
-}
+#define devm_regmap_init_mmio(dev, regs, config)		\
+	devm_regmap_init_mmio_clk(dev, NULL, regs, config)
 
 void regmap_exit(struct regmap *map);
 int regmap_reinit_cache(struct regmap *map,

From ae44a2f6a03338cb9d2bd32864f686c732b7841f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 5 Aug 2015 14:07:01 +0100
Subject: [PATCH 115/734] ASN.1: Add an ASN.1 compiler option to dump the
 element tree

Add an ASN.1 compiler option to dump the element tree to stdout.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-By: David Woodhouse <David.Woodhouse@intel.com>
---
 scripts/asn1_compiler.c | 88 +++++++++++++++++++++++++++++++++++------
 1 file changed, 76 insertions(+), 12 deletions(-)

diff --git a/scripts/asn1_compiler.c b/scripts/asn1_compiler.c
index 1c75e22b6385fe..6e4ba992a51f1b 100644
--- a/scripts/asn1_compiler.c
+++ b/scripts/asn1_compiler.c
@@ -13,6 +13,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
+#include <stdbool.h>
 #include <string.h>
 #include <ctype.h>
 #include <unistd.h>
@@ -311,9 +312,11 @@ struct token {
 
 static struct token *token_list;
 static unsigned nr_tokens;
-static _Bool verbose;
+static bool verbose_opt;
+static bool debug_opt;
 
-#define debug(fmt, ...) do { if (verbose) printf(fmt, ## __VA_ARGS__); } while (0)
+#define verbose(fmt, ...) do { if (verbose_opt) printf(fmt, ## __VA_ARGS__); } while (0)
+#define debug(fmt, ...) do { if (debug_opt) printf(fmt, ## __VA_ARGS__); } while (0)
 
 static int directive_compare(const void *_key, const void *_pdir)
 {
@@ -518,7 +521,7 @@ static void tokenise(char *buffer, char *end)
 	}
 
 	nr_tokens = tix;
-	debug("Extracted %u tokens\n", nr_tokens);
+	verbose("Extracted %u tokens\n", nr_tokens);
 
 #if 0
 	{
@@ -534,6 +537,7 @@ static void tokenise(char *buffer, char *end)
 
 static void build_type_list(void);
 static void parse(void);
+static void dump_elements(void);
 static void render(FILE *out, FILE *hdr);
 
 /*
@@ -548,16 +552,27 @@ int main(int argc, char **argv)
 	char *kbuild_verbose;
 	int fd;
 
+	kbuild_verbose = getenv("KBUILD_VERBOSE");
+	if (kbuild_verbose)
+		verbose_opt = atoi(kbuild_verbose);
+
+	while (argc > 4) {
+		if (strcmp(argv[1], "-v") == 0)
+			verbose_opt = true;
+		else if (strcmp(argv[1], "-d") == 0)
+			debug_opt = true;
+		else
+			break;
+		memmove(&argv[1], &argv[2], (argc - 2) * sizeof(char *));
+		argc--;
+	}
+
 	if (argc != 4) {
-		fprintf(stderr, "Format: %s <grammar-file> <c-file> <hdr-file>\n",
+		fprintf(stderr, "Format: %s [-v] [-d] <grammar-file> <c-file> <hdr-file>\n",
 			argv[0]);
 		exit(2);
 	}
 
-	kbuild_verbose = getenv("KBUILD_VERBOSE");
-	if (kbuild_verbose)
-		verbose = atoi(kbuild_verbose);
-
 	filename = argv[1];
 	outputname = argv[2];
 	headername = argv[3];
@@ -608,6 +623,7 @@ int main(int argc, char **argv)
 	tokenise(buffer, buffer + readlen);
 	build_type_list();
 	parse();
+	dump_elements();
 
 	out = fopen(outputname, "w");
 	if (!out) {
@@ -756,7 +772,7 @@ static void build_type_list(void)
 
 	qsort(type_index, nr, sizeof(type_index[0]), type_index_compare);
 
-	debug("Extracted %u types\n", nr_types);
+	verbose("Extracted %u types\n", nr_types);
 #if 0
 	for (n = 0; n < nr_types; n++) {
 		struct type *type = type_index[n];
@@ -801,7 +817,7 @@ static void parse(void)
 
 	} while (type++, !(type->flags & TYPE_STOP_MARKER));
 
-	debug("Extracted %u actions\n", nr_actions);
+	verbose("Extracted %u actions\n", nr_actions);
 }
 
 static struct element *element_list;
@@ -1192,6 +1208,54 @@ static struct element *parse_compound(struct token **_cursor, struct token *end,
 	exit(1);
 }
 
+static void dump_element(const struct element *e, int level)
+{
+	const struct element *c;
+	const struct type *t = e->type_def;
+	const char *name = e->name ? e->name->value : ".";
+	int nsize = e->name ? e->name->size : 1;
+	const char *tname = t && t->name ? t->name->value : ".";
+	int tnsize = t && t->name ? t->name->size : 1;
+	char tag[32];
+
+	if (e->class == 0 && e->method == 0 && e->tag == 0)
+		strcpy(tag, "<...>");
+	else if (e->class == ASN1_UNIV)
+		sprintf(tag, "%s %s %s",
+			asn1_classes[e->class],
+			asn1_methods[e->method],
+			asn1_universal_tags[e->tag]);
+	else
+		sprintf(tag, "%s %s %u",
+			asn1_classes[e->class],
+			asn1_methods[e->method],
+			e->tag);
+
+	printf("%c%c%c%c%c %c %*s[*] \e[33m%s\e[m %*.*s %*.*s \e[35m%s\e[m\n",
+	       e->flags & ELEMENT_IMPLICIT ? 'I' : '-',
+	       e->flags & ELEMENT_EXPLICIT ? 'E' : '-',
+	       e->flags & ELEMENT_TAG_SPECIFIED ? 'T' : '-',
+	       e->flags & ELEMENT_SKIPPABLE ? 'S' : '-',
+	       e->flags & ELEMENT_CONDITIONAL ? 'C' : '-',
+	       "-tTqQcaro"[e->compound],
+	       level, "",
+	       tag,
+	       tnsize, tnsize, tname,
+	       nsize, nsize, name,
+	       e->action ? e->action->name : "");
+	if (e->compound == TYPE_REF)
+		dump_element(e->type->type->element, level + 3);
+	else
+		for (c = e->children; c; c = c->next)
+			dump_element(c, level + 3);
+}
+
+static void dump_elements(void)
+{
+	if (debug_opt)
+		dump_element(type_list[0].element, 0);
+}
+
 static void render_element(FILE *out, struct element *e, struct element *tag);
 static void render_out_of_line_list(FILE *out);
 
@@ -1293,7 +1357,7 @@ static void render(FILE *out, FILE *hdr)
 	}
 
 	/* We do two passes - the first one calculates all the offsets */
-	debug("Pass 1\n");
+	verbose("Pass 1\n");
 	nr_entries = 0;
 	root = &type_list[0];
 	render_element(NULL, root->element, NULL);
@@ -1304,7 +1368,7 @@ static void render(FILE *out, FILE *hdr)
 		e->flags &= ~ELEMENT_RENDERED;
 
 	/* And then we actually render */
-	debug("Pass 2\n");
+	verbose("Pass 2\n");
 	fprintf(out, "\n");
 	fprintf(out, "static const unsigned char %s_machine[] = {\n",
 		grammar_name);

From c05cae9a58dca6dcbc6e66b228a9589c6b60880c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 29 Jul 2015 21:14:00 +0100
Subject: [PATCH 116/734] ASN.1: Copy string names to tokens in ASN.1 compiler

Copy string names to tokens in ASN.1 compiler rather than storing a pointer
into the source text.  This means we don't have to use "%*.*s" all over the
place.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: David Woodhouse <David.Woodhouse@intel.com>
---
 scripts/asn1_compiler.c | 155 +++++++++++++++++++---------------------
 1 file changed, 73 insertions(+), 82 deletions(-)

diff --git a/scripts/asn1_compiler.c b/scripts/asn1_compiler.c
index 6e4ba992a51f1b..e000f44e37b877 100644
--- a/scripts/asn1_compiler.c
+++ b/scripts/asn1_compiler.c
@@ -294,8 +294,8 @@ static const char *const directives[NR__DIRECTIVES] = {
 
 struct action {
 	struct action	*next;
+	char		*name;
 	unsigned char	index;
-	char		name[];
 };
 
 static struct action *action_list;
@@ -306,7 +306,7 @@ struct token {
 	enum token_type	token_type : 8;
 	unsigned char	size;
 	struct action	*action;
-	const char	*value;
+	char		*content;
 	struct type	*type;
 };
 
@@ -328,11 +328,9 @@ static int directive_compare(const void *_key, const void *_pdir)
 	dlen = strlen(dir);
 	clen = (dlen < token->size) ? dlen : token->size;
 
-	//debug("cmp(%*.*s,%s) = ",
-	//       (int)token->size, (int)token->size, token->value,
-	//       dir);
+	//debug("cmp(%s,%s) = ", token->content, dir);
 
-	val = memcmp(token->value, dir, clen);
+	val = memcmp(token->content, dir, clen);
 	if (val != 0) {
 		//debug("%d [cmp]\n", val);
 		return val;
@@ -352,7 +350,7 @@ static int directive_compare(const void *_key, const void *_pdir)
 static void tokenise(char *buffer, char *end)
 {
 	struct token *tokens;
-	char *line, *nl, *p, *q;
+	char *line, *nl, *start, *p, *q;
 	unsigned tix, lineno;
 
 	/* Assume we're going to have half as many tokens as we have
@@ -411,11 +409,11 @@ static void tokenise(char *buffer, char *end)
 				break;
 
 			tokens[tix].line = lineno;
-			tokens[tix].value = p;
+			start = p;
 
 			/* Handle string tokens */
 			if (isalpha(*p)) {
-				const char **dir;
+				const char **dir, *start = p;
 
 				/* Can be a directive, type name or element
 				 * name.  Find the end of the name.
@@ -426,10 +424,18 @@ static void tokenise(char *buffer, char *end)
 				tokens[tix].size = q - p;
 				p = q;
 
+				tokens[tix].content = malloc(tokens[tix].size + 1);
+				if (!tokens[tix].content) {
+					perror(NULL);
+					exit(1);
+				}
+				memcpy(tokens[tix].content, start, tokens[tix].size);
+				tokens[tix].content[tokens[tix].size] = 0;
+				
 				/* If it begins with a lowercase letter then
 				 * it's an element name
 				 */
-				if (islower(tokens[tix].value[0])) {
+				if (islower(tokens[tix].content[0])) {
 					tokens[tix++].token_type = TOKEN_ELEMENT_NAME;
 					continue;
 				}
@@ -458,6 +464,13 @@ static void tokenise(char *buffer, char *end)
 					q++;
 				tokens[tix].size = q - p;
 				p = q;
+				tokens[tix].content = malloc(tokens[tix].size + 1);
+				if (!tokens[tix].content) {
+					perror(NULL);
+					exit(1);
+				}
+				memcpy(tokens[tix].content, start, tokens[tix].size);
+				tokens[tix].content[tokens[tix].size] = 0;
 				tokens[tix++].token_type = TOKEN_NUMBER;
 				continue;
 			}
@@ -466,6 +479,7 @@ static void tokenise(char *buffer, char *end)
 				if (memcmp(p, "::=", 3) == 0) {
 					p += 3;
 					tokens[tix].size = 3;
+					tokens[tix].content = "::=";
 					tokens[tix++].token_type = TOKEN_ASSIGNMENT;
 					continue;
 				}
@@ -475,12 +489,14 @@ static void tokenise(char *buffer, char *end)
 				if (memcmp(p, "({", 2) == 0) {
 					p += 2;
 					tokens[tix].size = 2;
+					tokens[tix].content = "({";
 					tokens[tix++].token_type = TOKEN_OPEN_ACTION;
 					continue;
 				}
 				if (memcmp(p, "})", 2) == 0) {
 					p += 2;
 					tokens[tix].size = 2;
+					tokens[tix].content = "})";
 					tokens[tix++].token_type = TOKEN_CLOSE_ACTION;
 					continue;
 				}
@@ -491,22 +507,27 @@ static void tokenise(char *buffer, char *end)
 				switch (*p) {
 				case '{':
 					p += 1;
+					tokens[tix].content = "{";
 					tokens[tix++].token_type = TOKEN_OPEN_CURLY;
 					continue;
 				case '}':
 					p += 1;
+					tokens[tix].content = "}";
 					tokens[tix++].token_type = TOKEN_CLOSE_CURLY;
 					continue;
 				case '[':
 					p += 1;
+					tokens[tix].content = "[";
 					tokens[tix++].token_type = TOKEN_OPEN_SQUARE;
 					continue;
 				case ']':
 					p += 1;
+					tokens[tix].content = "]";
 					tokens[tix++].token_type = TOKEN_CLOSE_SQUARE;
 					continue;
 				case ',':
 					p += 1;
+					tokens[tix].content = ",";
 					tokens[tix++].token_type = TOKEN_COMMA;
 					continue;
 				default:
@@ -527,10 +548,7 @@ static void tokenise(char *buffer, char *end)
 	{
 		int n;
 		for (n = 0; n < nr_tokens; n++)
-			debug("Token %3u: '%*.*s'\n",
-			       n,
-			       (int)token_list[n].size, (int)token_list[n].size,
-			       token_list[n].value);
+			debug("Token %3u: '%s'\n", n, token_list[n].content);
 	}
 #endif
 }
@@ -709,7 +727,7 @@ static int type_index_compare(const void *_a, const void *_b)
 	if ((*a)->name->size != (*b)->name->size)
 		return (*a)->name->size - (*b)->name->size;
 	else
-		return memcmp((*a)->name->value, (*b)->name->value,
+		return memcmp((*a)->name->content, (*b)->name->content,
 			      (*a)->name->size);
 }
 
@@ -722,7 +740,7 @@ static int type_finder(const void *_key, const void *_ti)
 	if (token->size != type->name->size)
 		return token->size - type->name->size;
 	else
-		return memcmp(token->value, type->name->value,
+		return memcmp(token->content, type->name->content,
 			      token->size);
 }
 
@@ -776,10 +794,7 @@ static void build_type_list(void)
 #if 0
 	for (n = 0; n < nr_types; n++) {
 		struct type *type = type_index[n];
-		debug("- %*.*s\n",
-		       (int)type->name->size,
-		       (int)type->name->size,
-		       type->name->value);
+		debug("- %*.*s\n", type->name->content);
 	}
 #endif
 }
@@ -809,9 +824,8 @@ static void parse(void)
 		type->element->type_def = type;
 
 		if (cursor != type[1].name) {
-			fprintf(stderr, "%s:%d: Parse error at token '%*.*s'\n",
-				filename, cursor->line,
-				(int)cursor->size, (int)cursor->size, cursor->value);
+			fprintf(stderr, "%s:%d: Parse error at token '%s'\n",
+				filename, cursor->line, cursor->content);
 			exit(1);
 		}
 
@@ -878,34 +892,31 @@ static struct element *parse_type(struct token **_cursor, struct token *end,
 			cursor++;
 			break;
 		default:
-			fprintf(stderr, "%s:%d: Unrecognised tag class token '%*.*s'\n",
-				filename, cursor->line,
-				(int)cursor->size, (int)cursor->size, cursor->value);
+			fprintf(stderr, "%s:%d: Unrecognised tag class token '%s'\n",
+				filename, cursor->line, cursor->content);
 			exit(1);
 		}
 
 		if (cursor >= end)
 			goto overrun_error;
 		if (cursor->token_type != TOKEN_NUMBER) {
-			fprintf(stderr, "%s:%d: Missing tag number '%*.*s'\n",
-				filename, cursor->line,
-				(int)cursor->size, (int)cursor->size, cursor->value);
+			fprintf(stderr, "%s:%d: Missing tag number '%s'\n",
+				filename, cursor->line, cursor->content);
 			exit(1);
 		}
 
 		element->tag &= ~0x1f;
-		element->tag |= strtoul(cursor->value, &p, 10);
+		element->tag |= strtoul(cursor->content, &p, 10);
 		element->flags |= ELEMENT_TAG_SPECIFIED;
-		if (p - cursor->value != cursor->size)
+		if (p - cursor->content != cursor->size)
 			abort();
 		cursor++;
 
 		if (cursor >= end)
 			goto overrun_error;
 		if (cursor->token_type != TOKEN_CLOSE_SQUARE) {
-			fprintf(stderr, "%s:%d: Missing closing square bracket '%*.*s'\n",
-				filename, cursor->line,
-				(int)cursor->size, (int)cursor->size, cursor->value);
+			fprintf(stderr, "%s:%d: Missing closing square bracket '%s'\n",
+				filename, cursor->line, cursor->content);
 			exit(1);
 		}
 		cursor++;
@@ -1005,9 +1016,8 @@ static struct element *parse_type(struct token **_cursor, struct token *end,
 		ref = bsearch(cursor, type_index, nr_types, sizeof(type_index[0]),
 			      type_finder);
 		if (!ref) {
-			fprintf(stderr, "%s:%d: Type '%*.*s' undefined\n",
-				filename, cursor->line,
-				(int)cursor->size, (int)cursor->size, cursor->value);
+			fprintf(stderr, "%s:%d: Type '%s' undefined\n",
+				filename, cursor->line, cursor->content);
 			exit(1);
 		}
 		cursor->type = *ref;
@@ -1056,9 +1066,8 @@ static struct element *parse_type(struct token **_cursor, struct token *end,
 		break;
 
 	default:
-		fprintf(stderr, "%s:%d: Token '%*.*s' does not introduce a type\n",
-			filename, cursor->line,
-			(int)cursor->size, (int)cursor->size, cursor->value);
+		fprintf(stderr, "%s:%d: Token '%s' does not introduce a type\n",
+			filename, cursor->line, cursor->content);
 		exit(1);
 	}
 
@@ -1075,20 +1084,18 @@ static struct element *parse_type(struct token **_cursor, struct token *end,
 		if (cursor >= end)
 			goto overrun_error;
 		if (cursor->token_type != TOKEN_ELEMENT_NAME) {
-			fprintf(stderr, "%s:%d: Token '%*.*s' is not an action function name\n",
-				filename, cursor->line,
-				(int)cursor->size, (int)cursor->size, cursor->value);
+			fprintf(stderr, "%s:%d: Token '%s' is not an action function name\n",
+				filename, cursor->line, cursor->content);
 			exit(1);
 		}
 
-		action = malloc(sizeof(struct action) + cursor->size + 1);
+		action = malloc(sizeof(struct action));
 		if (!action) {
 			perror(NULL);
 			exit(1);
 		}
 		action->index = 0;
-		memcpy(action->name, cursor->value, cursor->size);
-		action->name[cursor->size] = 0;
+		action->name = cursor->content;
 
 		for (ppaction = &action_list;
 		     *ppaction;
@@ -1118,9 +1125,8 @@ static struct element *parse_type(struct token **_cursor, struct token *end,
 		if (cursor >= end)
 			goto overrun_error;
 		if (cursor->token_type != TOKEN_CLOSE_ACTION) {
-			fprintf(stderr, "%s:%d: Missing close action, got '%*.*s'\n",
-				filename, cursor->line,
-				(int)cursor->size, (int)cursor->size, cursor->value);
+			fprintf(stderr, "%s:%d: Missing close action, got '%s'\n",
+				filename, cursor->line, cursor->content);
 			exit(1);
 		}
 		cursor++;
@@ -1130,9 +1136,8 @@ static struct element *parse_type(struct token **_cursor, struct token *end,
 	return top;
 
 parse_error:
-	fprintf(stderr, "%s:%d: Unexpected token '%*.*s'\n",
-		filename, cursor->line,
-		(int)cursor->size, (int)cursor->size, cursor->value);
+	fprintf(stderr, "%s:%d: Unexpected token '%s'\n",
+		filename, cursor->line, cursor->content);
 	exit(1);
 
 overrun_error:
@@ -1150,9 +1155,8 @@ static struct element *parse_compound(struct token **_cursor, struct token *end,
 	struct token *cursor = *_cursor, *name;
 
 	if (cursor->token_type != TOKEN_OPEN_CURLY) {
-		fprintf(stderr, "%s:%d: Expected compound to start with brace not '%*.*s'\n",
-			filename, cursor->line,
-			(int)cursor->size, (int)cursor->size, cursor->value);
+		fprintf(stderr, "%s:%d: Expected compound to start with brace not '%s'\n",
+			filename, cursor->line, cursor->content);
 		exit(1);
 	}
 	cursor++;
@@ -1193,9 +1197,8 @@ static struct element *parse_compound(struct token **_cursor, struct token *end,
 	children->flags &= ~ELEMENT_CONDITIONAL;
 
 	if (cursor->token_type != TOKEN_CLOSE_CURLY) {
-		fprintf(stderr, "%s:%d: Expected compound closure, got '%*.*s'\n",
-			filename, cursor->line,
-			(int)cursor->size, (int)cursor->size, cursor->value);
+		fprintf(stderr, "%s:%d: Expected compound closure, got '%s'\n",
+			filename, cursor->line, cursor->content);
 		exit(1);
 	}
 	cursor++;
@@ -1212,10 +1215,8 @@ static void dump_element(const struct element *e, int level)
 {
 	const struct element *c;
 	const struct type *t = e->type_def;
-	const char *name = e->name ? e->name->value : ".";
-	int nsize = e->name ? e->name->size : 1;
-	const char *tname = t && t->name ? t->name->value : ".";
-	int tnsize = t && t->name ? t->name->size : 1;
+	const char *name = e->name ? e->name->content : ".";
+	const char *tname = t && t->name ? t->name->content : ".";
 	char tag[32];
 
 	if (e->class == 0 && e->method == 0 && e->tag == 0)
@@ -1231,7 +1232,7 @@ static void dump_element(const struct element *e, int level)
 			asn1_methods[e->method],
 			e->tag);
 
-	printf("%c%c%c%c%c %c %*s[*] \e[33m%s\e[m %*.*s %*.*s \e[35m%s\e[m\n",
+	printf("%c%c%c%c%c %c %*s[*] \e[33m%s\e[m %s %s \e[35m%s\e[m\n",
 	       e->flags & ELEMENT_IMPLICIT ? 'I' : '-',
 	       e->flags & ELEMENT_EXPLICIT ? 'E' : '-',
 	       e->flags & ELEMENT_TAG_SPECIFIED ? 'T' : '-',
@@ -1240,8 +1241,8 @@ static void dump_element(const struct element *e, int level)
 	       "-tTqQcaro"[e->compound],
 	       level, "",
 	       tag,
-	       tnsize, tnsize, tname,
-	       nsize, nsize, name,
+	       tname,
+	       name,
 	       e->action ? e->action->name : "");
 	if (e->compound == TYPE_REF)
 		dump_element(e->type->type->element, level + 3);
@@ -1454,9 +1455,7 @@ static void render_element(FILE *out, struct element *e, struct element *tag)
 		outofline = 1;
 
 	if (e->type_def && out) {
-		render_more(out, "\t// %*.*s\n",
-			    (int)e->type_def->name->size, (int)e->type_def->name->size,
-			    e->type_def->name->value);
+		render_more(out, "\t// %s\n", e->type_def->name->content);
 	}
 
 	/* Render the operation */
@@ -1468,9 +1467,7 @@ static void render_element(FILE *out, struct element *e, struct element *tag)
 		render_opcode(out, "ASN1_OP_%sMATCH_ANY%s%s,",
 			      cond, act, skippable ? "_OR_SKIP" : "");
 		if (e->name)
-			render_more(out, "\t\t// %*.*s",
-				    (int)e->name->size, (int)e->name->size,
-				    e->name->value);
+			render_more(out, "\t\t// %s", e->name->content);
 		render_more(out, "\n");
 		goto dont_render_tag;
 
@@ -1503,9 +1500,7 @@ static void render_element(FILE *out, struct element *e, struct element *tag)
 
 	x = tag ?: e;
 	if (x->name)
-		render_more(out, "\t\t// %*.*s",
-			    (int)x->name->size, (int)x->name->size,
-			    x->name->value);
+		render_more(out, "\t\t// %s", x->name->content);
 	render_more(out, "\n");
 
 	/* Render the tag */
@@ -1543,10 +1538,8 @@ static void render_element(FILE *out, struct element *e, struct element *tag)
 			 * skipability */
 			render_opcode(out, "_jump_target(%u),", e->entry_index);
 			if (e->type_def && e->type_def->name)
-				render_more(out, "\t\t// --> %*.*s",
-					    (int)e->type_def->name->size,
-					    (int)e->type_def->name->size,
-					    e->type_def->name->value);
+				render_more(out, "\t\t// --> %s",
+					    e->type_def->name->content);
 			render_more(out, "\n");
 			if (!(e->flags & ELEMENT_RENDERED)) {
 				e->flags |= ELEMENT_RENDERED;
@@ -1571,10 +1564,8 @@ static void render_element(FILE *out, struct element *e, struct element *tag)
 			 * skipability */
 			render_opcode(out, "_jump_target(%u),", e->entry_index);
 			if (e->type_def && e->type_def->name)
-				render_more(out, "\t\t// --> %*.*s",
-					    (int)e->type_def->name->size,
-					    (int)e->type_def->name->size,
-					    e->type_def->name->value);
+				render_more(out, "\t\t// --> %s",
+					    e->type_def->name->content);
 			render_more(out, "\n");
 			if (!(e->flags & ELEMENT_RENDERED)) {
 				e->flags |= ELEMENT_RENDERED;

From b92e6570a992c7d793a209db282f68159368201c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 20 Jul 2015 21:16:26 +0100
Subject: [PATCH 117/734] X.509: Extract both parts of the
 AuthorityKeyIdentifier

Extract both parts of the AuthorityKeyIdentifier, not just the keyIdentifier,
as the second part can be used to match X.509 certificates by issuer and
serialNumber.

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Vivek Goyal <vgoyal@redhat.com>
---
 crypto/asymmetric_keys/Makefile           |   8 +-
 crypto/asymmetric_keys/pkcs7_trust.c      |   4 +-
 crypto/asymmetric_keys/pkcs7_verify.c     |  12 +-
 crypto/asymmetric_keys/x509_akid.asn1     |  35 ++++++
 crypto/asymmetric_keys/x509_cert_parser.c | 142 ++++++++++++++--------
 crypto/asymmetric_keys/x509_parser.h      |   5 +-
 crypto/asymmetric_keys/x509_public_key.c  |   8 +-
 7 files changed, 145 insertions(+), 69 deletions(-)
 create mode 100644 crypto/asymmetric_keys/x509_akid.asn1

diff --git a/crypto/asymmetric_keys/Makefile b/crypto/asymmetric_keys/Makefile
index e47fcd9ac5e86f..cd1406f9b14ab2 100644
--- a/crypto/asymmetric_keys/Makefile
+++ b/crypto/asymmetric_keys/Makefile
@@ -15,15 +15,21 @@ obj-$(CONFIG_PUBLIC_KEY_ALGO_RSA) += rsa.o
 obj-$(CONFIG_X509_CERTIFICATE_PARSER) += x509_key_parser.o
 x509_key_parser-y := \
 	x509-asn1.o \
+	x509_akid-asn1.o \
 	x509_rsakey-asn1.o \
 	x509_cert_parser.o \
 	x509_public_key.o
 
-$(obj)/x509_cert_parser.o: $(obj)/x509-asn1.h $(obj)/x509_rsakey-asn1.h
+$(obj)/x509_cert_parser.o: \
+	$(obj)/x509-asn1.h \
+	$(obj)/x509_akid-asn1.h \
+	$(obj)/x509_rsakey-asn1.h
 $(obj)/x509-asn1.o: $(obj)/x509-asn1.c $(obj)/x509-asn1.h
+$(obj)/x509_akid-asn1.o: $(obj)/x509_akid-asn1.c $(obj)/x509_akid-asn1.h
 $(obj)/x509_rsakey-asn1.o: $(obj)/x509_rsakey-asn1.c $(obj)/x509_rsakey-asn1.h
 
 clean-files	+= x509-asn1.c x509-asn1.h
+clean-files	+= x509_akid-asn1.c x509_akid-asn1.h
 clean-files	+= x509_rsakey-asn1.c x509_rsakey-asn1.h
 
 #
diff --git a/crypto/asymmetric_keys/pkcs7_trust.c b/crypto/asymmetric_keys/pkcs7_trust.c
index 1d29376072da4a..0f6463b6692b3b 100644
--- a/crypto/asymmetric_keys/pkcs7_trust.c
+++ b/crypto/asymmetric_keys/pkcs7_trust.c
@@ -85,8 +85,8 @@ static int pkcs7_validate_trust_one(struct pkcs7_message *pkcs7,
 	/* No match - see if the root certificate has a signer amongst the
 	 * trusted keys.
 	 */
-	if (last && last->authority) {
-		key = x509_request_asymmetric_key(trust_keyring, last->authority,
+	if (last && last->akid_skid) {
+		key = x509_request_asymmetric_key(trust_keyring, last->akid_skid,
 						  false);
 		if (!IS_ERR(key)) {
 			x509 = last;
diff --git a/crypto/asymmetric_keys/pkcs7_verify.c b/crypto/asymmetric_keys/pkcs7_verify.c
index cd455450b069e3..a4d083f7e9e175 100644
--- a/crypto/asymmetric_keys/pkcs7_verify.c
+++ b/crypto/asymmetric_keys/pkcs7_verify.c
@@ -187,11 +187,11 @@ static int pkcs7_verify_sig_chain(struct pkcs7_message *pkcs7,
 			goto maybe_missing_crypto_in_x509;
 
 		pr_debug("- issuer %s\n", x509->issuer);
-		if (x509->authority)
+		if (x509->akid_skid)
 			pr_debug("- authkeyid %*phN\n",
-				 x509->authority->len, x509->authority->data);
+				 x509->akid_skid->len, x509->akid_skid->data);
 
-		if (!x509->authority ||
+		if (!x509->akid_skid ||
 		    strcmp(x509->subject, x509->issuer) == 0) {
 			/* If there's no authority certificate specified, then
 			 * the certificate must be self-signed and is the root
@@ -216,13 +216,13 @@ static int pkcs7_verify_sig_chain(struct pkcs7_message *pkcs7,
 		 * list to see if the next one is there.
 		 */
 		pr_debug("- want %*phN\n",
-			 x509->authority->len, x509->authority->data);
+			 x509->akid_skid->len, x509->akid_skid->data);
 		for (p = pkcs7->certs; p; p = p->next) {
 			if (!p->skid)
 				continue;
 			pr_debug("- cmp [%u] %*phN\n",
 				 p->index, p->skid->len, p->skid->data);
-			if (asymmetric_key_id_same(p->skid, x509->authority))
+			if (asymmetric_key_id_same(p->skid, x509->akid_skid))
 				goto found_issuer;
 		}
 
@@ -338,8 +338,6 @@ int pkcs7_verify(struct pkcs7_message *pkcs7)
 		ret = x509_get_sig_params(x509);
 		if (ret < 0)
 			return ret;
-		pr_debug("X.509[%u] %*phN\n",
-			 n, x509->authority->len, x509->authority->data);
 	}
 
 	for (sinfo = pkcs7->signed_infos; sinfo; sinfo = sinfo->next) {
diff --git a/crypto/asymmetric_keys/x509_akid.asn1 b/crypto/asymmetric_keys/x509_akid.asn1
new file mode 100644
index 00000000000000..1a33231a75a89d
--- /dev/null
+++ b/crypto/asymmetric_keys/x509_akid.asn1
@@ -0,0 +1,35 @@
+-- X.509 AuthorityKeyIdentifier
+-- rfc5280 section 4.2.1.1
+
+AuthorityKeyIdentifier ::= SEQUENCE {
+	keyIdentifier			[0] IMPLICIT KeyIdentifier		OPTIONAL,
+	authorityCertIssuer		[1] IMPLICIT GeneralNames		OPTIONAL,
+	authorityCertSerialNumber	[2] IMPLICIT CertificateSerialNumber	OPTIONAL
+	}
+
+KeyIdentifier ::= OCTET STRING ({ x509_akid_note_kid })
+
+CertificateSerialNumber ::= INTEGER ({ x509_akid_note_serial })
+
+GeneralNames ::= SEQUENCE OF GeneralName
+
+GeneralName ::= CHOICE {
+	otherName			[0] ANY,
+	rfc822Name			[1] IA5String,
+	dNSName				[2] IA5String,
+	x400Address			[3] ANY,
+	directoryName			[4] Name ({ x509_akid_note_name }),
+	ediPartyName			[5] ANY,
+	uniformResourceIdentifier	[6] IA5String,
+	iPAddress			[7] OCTET STRING,
+	registeredID			[8] OBJECT IDENTIFIER
+	}
+
+Name ::= SEQUENCE OF RelativeDistinguishedName
+
+RelativeDistinguishedName ::= SET OF AttributeValueAssertion
+
+AttributeValueAssertion ::= SEQUENCE {
+	attributeType		OBJECT IDENTIFIER ({ x509_note_OID }),
+	attributeValue		ANY ({ x509_extract_name_segment })
+	}
diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c
index a668d90302d38c..6c130dd56f3591 100644
--- a/crypto/asymmetric_keys/x509_cert_parser.c
+++ b/crypto/asymmetric_keys/x509_cert_parser.c
@@ -18,6 +18,7 @@
 #include "public_key.h"
 #include "x509_parser.h"
 #include "x509-asn1.h"
+#include "x509_akid-asn1.h"
 #include "x509_rsakey-asn1.h"
 
 struct x509_parse_context {
@@ -35,6 +36,10 @@ struct x509_parse_context {
 	u16		o_offset;		/* Offset of organizationName (O) */
 	u16		cn_offset;		/* Offset of commonName (CN) */
 	u16		email_offset;		/* Offset of emailAddress */
+	unsigned	raw_akid_size;
+	const void	*raw_akid;		/* Raw authorityKeyId in ASN.1 */
+	const void	*akid_raw_issuer;	/* Raw directoryName in authorityKeyId */
+	unsigned	akid_raw_issuer_size;
 };
 
 /*
@@ -48,7 +53,8 @@ void x509_free_certificate(struct x509_certificate *cert)
 		kfree(cert->subject);
 		kfree(cert->id);
 		kfree(cert->skid);
-		kfree(cert->authority);
+		kfree(cert->akid_id);
+		kfree(cert->akid_skid);
 		kfree(cert->sig.digest);
 		mpi_free(cert->sig.rsa.s);
 		kfree(cert);
@@ -85,6 +91,18 @@ struct x509_certificate *x509_cert_parse(const void *data, size_t datalen)
 	if (ret < 0)
 		goto error_decode;
 
+	/* Decode the AuthorityKeyIdentifier */
+	if (ctx->raw_akid) {
+		pr_devel("AKID: %u %*phN\n",
+			 ctx->raw_akid_size, ctx->raw_akid_size, ctx->raw_akid);
+		ret = asn1_ber_decoder(&x509_akid_decoder, ctx,
+				       ctx->raw_akid, ctx->raw_akid_size);
+		if (ret < 0) {
+			pr_warn("Couldn't decode AuthKeyIdentifier\n");
+			goto error_decode;
+		}
+	}
+
 	/* Decode the public key */
 	ret = asn1_ber_decoder(&x509_rsakey_decoder, ctx,
 			       ctx->key, ctx->key_size);
@@ -422,7 +440,6 @@ int x509_process_extension(void *context, size_t hdrlen,
 	struct x509_parse_context *ctx = context;
 	struct asymmetric_key_id *kid;
 	const unsigned char *v = value;
-	int i;
 
 	pr_debug("Extension: %u\n", ctx->last_oid);
 
@@ -449,57 +466,8 @@ int x509_process_extension(void *context, size_t hdrlen,
 
 	if (ctx->last_oid == OID_authorityKeyIdentifier) {
 		/* Get hold of the CA key fingerprint */
-		if (ctx->cert->authority || vlen < 5)
-			return -EBADMSG;
-
-		/* Authority Key Identifier must be a Constructed SEQUENCE */
-		if (v[0] != (ASN1_SEQ | (ASN1_CONS << 5)))
-			return -EBADMSG;
-
-		/* Authority Key Identifier is not indefinite length */
-		if (unlikely(vlen == ASN1_INDEFINITE_LENGTH))
-			return -EBADMSG;
-
-		if (vlen < ASN1_INDEFINITE_LENGTH) {
-			/* Short Form length */
-			if (v[1] != vlen - 2 ||
-			    v[2] != SEQ_TAG_KEYID ||
-			    v[3] > vlen - 4)
-				return -EBADMSG;
-
-			vlen = v[3];
-			v += 4;
-		} else {
-			/* Long Form length */
-			size_t seq_len = 0;
-			size_t sub = v[1] - ASN1_INDEFINITE_LENGTH;
-
-			if (sub > 2)
-				return -EBADMSG;
-
-			/* calculate the length from subsequent octets */
-			v += 2;
-			for (i = 0; i < sub; i++) {
-				seq_len <<= 8;
-				seq_len |= v[i];
-			}
-
-			if (seq_len != vlen - 2 - sub ||
-			    v[sub] != SEQ_TAG_KEYID ||
-			    v[sub + 1] > vlen - 4 - sub)
-				return -EBADMSG;
-
-			vlen = v[sub + 1];
-			v += (sub + 2);
-		}
-
-		kid = asymmetric_key_generate_id(ctx->cert->raw_issuer,
-						 ctx->cert->raw_issuer_size,
-						 v, vlen);
-		if (IS_ERR(kid))
-			return PTR_ERR(kid);
-		pr_debug("authkeyid %*phN\n", kid->len, kid->data);
-		ctx->cert->authority = kid;
+		ctx->raw_akid = v;
+		ctx->raw_akid_size = vlen;
 		return 0;
 	}
 
@@ -569,3 +537,71 @@ int x509_note_not_after(void *context, size_t hdrlen,
 	struct x509_parse_context *ctx = context;
 	return x509_note_time(&ctx->cert->valid_to, hdrlen, tag, value, vlen);
 }
+
+/*
+ * Note a key identifier-based AuthorityKeyIdentifier
+ */
+int x509_akid_note_kid(void *context, size_t hdrlen,
+		       unsigned char tag,
+		       const void *value, size_t vlen)
+{
+	struct x509_parse_context *ctx = context;
+	struct asymmetric_key_id *kid;
+
+	pr_debug("AKID: keyid: %*phN\n", (int)vlen, value);
+
+	if (ctx->cert->akid_skid)
+		return 0;
+
+	kid = asymmetric_key_generate_id(ctx->cert->raw_issuer,
+					 ctx->cert->raw_issuer_size,
+					 value, vlen);
+	if (IS_ERR(kid))
+		return PTR_ERR(kid);
+	pr_debug("authkeyid %*phN\n", kid->len, kid->data);
+	ctx->cert->akid_skid = kid;
+	return 0;
+}
+
+/*
+ * Note a directoryName in an AuthorityKeyIdentifier
+ */
+int x509_akid_note_name(void *context, size_t hdrlen,
+			unsigned char tag,
+			const void *value, size_t vlen)
+{
+	struct x509_parse_context *ctx = context;
+
+	pr_debug("AKID: name: %*phN\n", (int)vlen, value);
+
+	ctx->akid_raw_issuer = value;
+	ctx->akid_raw_issuer_size = vlen;
+	return 0;
+}
+
+/*
+ * Note a serial number in an AuthorityKeyIdentifier
+ */
+int x509_akid_note_serial(void *context, size_t hdrlen,
+			  unsigned char tag,
+			  const void *value, size_t vlen)
+{
+	struct x509_parse_context *ctx = context;
+	struct asymmetric_key_id *kid;
+
+	pr_debug("AKID: serial: %*phN\n", (int)vlen, value);
+
+	if (!ctx->akid_raw_issuer || ctx->cert->akid_id)
+		return 0;
+
+	kid = asymmetric_key_generate_id(value,
+					 vlen,
+					 ctx->akid_raw_issuer,
+					 ctx->akid_raw_issuer_size);
+	if (IS_ERR(kid))
+		return PTR_ERR(kid);
+
+	pr_debug("authkeyid %*phN\n", kid->len, kid->data);
+	ctx->cert->akid_id = kid;
+	return 0;
+}
diff --git a/crypto/asymmetric_keys/x509_parser.h b/crypto/asymmetric_keys/x509_parser.h
index 3dfe6b5d6f0b90..dcdb5c94f5148b 100644
--- a/crypto/asymmetric_keys/x509_parser.h
+++ b/crypto/asymmetric_keys/x509_parser.h
@@ -19,9 +19,10 @@ struct x509_certificate {
 	struct public_key_signature sig;	/* Signature parameters */
 	char		*issuer;		/* Name of certificate issuer */
 	char		*subject;		/* Name of certificate subject */
-	struct asymmetric_key_id *id;		/* Serial number + issuer */
+	struct asymmetric_key_id *id;		/* Issuer + Serial number */
 	struct asymmetric_key_id *skid;		/* Subject + subjectKeyId (optional) */
-	struct asymmetric_key_id *authority;	/* Authority key identifier (optional) */
+	struct asymmetric_key_id *akid_id;	/* CA AuthKeyId matching ->id (optional) */
+	struct asymmetric_key_id *akid_skid;	/* CA AuthKeyId matching ->skid (optional) */
 	struct tm	valid_from;
 	struct tm	valid_to;
 	const void	*tbs;			/* Signed data */
diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c
index 24f17e6c590488..bb55d6074d5fd5 100644
--- a/crypto/asymmetric_keys/x509_public_key.c
+++ b/crypto/asymmetric_keys/x509_public_key.c
@@ -227,10 +227,10 @@ static int x509_validate_trust(struct x509_certificate *cert,
 	if (!trust_keyring)
 		return -EOPNOTSUPP;
 
-	if (ca_keyid && !asymmetric_key_id_partial(cert->authority, ca_keyid))
+	if (ca_keyid && !asymmetric_key_id_partial(cert->akid_skid, ca_keyid))
 		return -EPERM;
 
-	key = x509_request_asymmetric_key(trust_keyring, cert->authority,
+	key = x509_request_asymmetric_key(trust_keyring, cert->akid_skid,
 					  false);
 	if (!IS_ERR(key))  {
 		if (!use_builtin_keys
@@ -287,8 +287,8 @@ static int x509_key_preparse(struct key_preparsed_payload *prep)
 	cert->pub->id_type = PKEY_ID_X509;
 
 	/* Check the signature on the key if it appears to be self-signed */
-	if (!cert->authority ||
-	    asymmetric_key_id_same(cert->skid, cert->authority)) {
+	if (!cert->akid_skid ||
+	    asymmetric_key_id_same(cert->skid, cert->akid_skid)) {
 		ret = x509_check_signature(cert->pub, cert); /* self-signed */
 		if (ret < 0)
 			goto error_free_cert;

From 4573b64a31cd8cb4cfeb1d1b95536cfe71980cf4 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 20 Jul 2015 21:16:26 +0100
Subject: [PATCH 118/734] X.509: Support X.509 lookup by Issuer+Serial form
 AuthorityKeyIdentifier

If an X.509 certificate has an AuthorityKeyIdentifier extension that provides
an issuer and serialNumber, then make it so that these are used in preference
to the keyIdentifier field also held therein for searching for the signing
certificate.

If both the issuer+serialNumber and the keyIdentifier are supplied, then the
certificate is looked up by the former but the latter is checked as well.  If
the latter doesn't match the subjectKeyIdentifier of the parent certificate,
EKEYREJECTED is returned.

This makes it possible to chain X.509 certificates based on the issuer and
serialNumber fields rather than on subjectKeyIdentifier.  This is necessary as
we are having to deal with keys that are represented by X.509 certificates
that lack a subjectKeyIdentifier.

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Vivek Goyal <vgoyal@redhat.com>
---
 crypto/asymmetric_keys/pkcs7_trust.c     | 10 ++-
 crypto/asymmetric_keys/pkcs7_verify.c    | 47 +++++++++----
 crypto/asymmetric_keys/x509_public_key.c | 84 ++++++++++++++++--------
 include/crypto/public_key.h              |  3 +-
 4 files changed, 103 insertions(+), 41 deletions(-)

diff --git a/crypto/asymmetric_keys/pkcs7_trust.c b/crypto/asymmetric_keys/pkcs7_trust.c
index 0f6463b6692b3b..90d6d47965b082 100644
--- a/crypto/asymmetric_keys/pkcs7_trust.c
+++ b/crypto/asymmetric_keys/pkcs7_trust.c
@@ -54,7 +54,8 @@ static int pkcs7_validate_trust_one(struct pkcs7_message *pkcs7,
 		/* Look to see if this certificate is present in the trusted
 		 * keys.
 		 */
-		key = x509_request_asymmetric_key(trust_keyring, x509->id,
+		key = x509_request_asymmetric_key(trust_keyring,
+						  x509->id, x509->skid,
 						  false);
 		if (!IS_ERR(key)) {
 			/* One of the X.509 certificates in the PKCS#7 message
@@ -85,8 +86,10 @@ static int pkcs7_validate_trust_one(struct pkcs7_message *pkcs7,
 	/* No match - see if the root certificate has a signer amongst the
 	 * trusted keys.
 	 */
-	if (last && last->akid_skid) {
-		key = x509_request_asymmetric_key(trust_keyring, last->akid_skid,
+	if (last && (last->akid_id || last->akid_skid)) {
+		key = x509_request_asymmetric_key(trust_keyring,
+						  last->akid_id,
+						  last->akid_skid,
 						  false);
 		if (!IS_ERR(key)) {
 			x509 = last;
@@ -103,6 +106,7 @@ static int pkcs7_validate_trust_one(struct pkcs7_message *pkcs7,
 	 */
 	key = x509_request_asymmetric_key(trust_keyring,
 					  sinfo->signing_cert_id,
+					  NULL,
 					  false);
 	if (!IS_ERR(key)) {
 		pr_devel("sinfo %u: Direct signer is key %x\n",
diff --git a/crypto/asymmetric_keys/pkcs7_verify.c b/crypto/asymmetric_keys/pkcs7_verify.c
index a4d083f7e9e175..42bfc9de0d796e 100644
--- a/crypto/asymmetric_keys/pkcs7_verify.c
+++ b/crypto/asymmetric_keys/pkcs7_verify.c
@@ -170,6 +170,7 @@ static int pkcs7_verify_sig_chain(struct pkcs7_message *pkcs7,
 				  struct pkcs7_signed_info *sinfo)
 {
 	struct x509_certificate *x509 = sinfo->signer, *p;
+	struct asymmetric_key_id *auth;
 	int ret;
 
 	kenter("");
@@ -187,11 +188,14 @@ static int pkcs7_verify_sig_chain(struct pkcs7_message *pkcs7,
 			goto maybe_missing_crypto_in_x509;
 
 		pr_debug("- issuer %s\n", x509->issuer);
+		if (x509->akid_id)
+			pr_debug("- authkeyid.id %*phN\n",
+				 x509->akid_id->len, x509->akid_id->data);
 		if (x509->akid_skid)
-			pr_debug("- authkeyid %*phN\n",
+			pr_debug("- authkeyid.skid %*phN\n",
 				 x509->akid_skid->len, x509->akid_skid->data);
 
-		if (!x509->akid_skid ||
+		if ((!x509->akid_id && !x509->akid_skid) ||
 		    strcmp(x509->subject, x509->issuer) == 0) {
 			/* If there's no authority certificate specified, then
 			 * the certificate must be self-signed and is the root
@@ -215,21 +219,42 @@ static int pkcs7_verify_sig_chain(struct pkcs7_message *pkcs7,
 		/* Look through the X.509 certificates in the PKCS#7 message's
 		 * list to see if the next one is there.
 		 */
-		pr_debug("- want %*phN\n",
-			 x509->akid_skid->len, x509->akid_skid->data);
-		for (p = pkcs7->certs; p; p = p->next) {
-			if (!p->skid)
-				continue;
-			pr_debug("- cmp [%u] %*phN\n",
-				 p->index, p->skid->len, p->skid->data);
-			if (asymmetric_key_id_same(p->skid, x509->akid_skid))
-				goto found_issuer;
+		auth = x509->akid_id;
+		if (auth) {
+			pr_debug("- want %*phN\n", auth->len, auth->data);
+			for (p = pkcs7->certs; p; p = p->next) {
+				pr_debug("- cmp [%u] %*phN\n",
+					 p->index, p->id->len, p->id->data);
+				if (asymmetric_key_id_same(p->id, auth))
+					goto found_issuer_check_skid;
+			}
+		} else {
+			auth = x509->akid_skid;
+			pr_debug("- want %*phN\n", auth->len, auth->data);
+			for (p = pkcs7->certs; p; p = p->next) {
+				if (!p->skid)
+					continue;
+				pr_debug("- cmp [%u] %*phN\n",
+					 p->index, p->skid->len, p->skid->data);
+				if (asymmetric_key_id_same(p->skid, auth))
+					goto found_issuer;
+			}
 		}
 
 		/* We didn't find the root of this chain */
 		pr_debug("- top\n");
 		return 0;
 
+	found_issuer_check_skid:
+		/* We matched issuer + serialNumber, but if there's an
+		 * authKeyId.keyId, that must match the CA subjKeyId also.
+		 */
+		if (x509->akid_skid &&
+		    !asymmetric_key_id_same(p->skid, x509->akid_skid)) {
+			pr_warn("Sig %u: X.509 chain contains auth-skid nonmatch (%u->%u)\n",
+				sinfo->index, x509->index, p->index);
+			return -EKEYREJECTED;
+		}
 	found_issuer:
 		pr_debug("- subject %s\n", p->subject);
 		if (p->seen) {
diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c
index bb55d6074d5fd5..6b060b290e778e 100644
--- a/crypto/asymmetric_keys/x509_public_key.c
+++ b/crypto/asymmetric_keys/x509_public_key.c
@@ -65,23 +65,37 @@ __setup("ca_keys=", ca_keys_setup);
 /**
  * x509_request_asymmetric_key - Request a key by X.509 certificate params.
  * @keyring: The keys to search.
- * @kid: The key ID.
+ * @id: The issuer & serialNumber to look for or NULL.
+ * @skid: The subjectKeyIdentifier to look for or NULL.
  * @partial: Use partial match if true, exact if false.
  *
- * Find a key in the given keyring by subject name and key ID.  These might,
- * for instance, be the issuer name and the authority key ID of an X.509
- * certificate that needs to be verified.
+ * Find a key in the given keyring by identifier.  The preferred identifier is
+ * the issuer + serialNumber and the fallback identifier is the
+ * subjectKeyIdentifier.  If both are given, the lookup is by the former, but
+ * the latter must also match.
  */
 struct key *x509_request_asymmetric_key(struct key *keyring,
-					const struct asymmetric_key_id *kid,
+					const struct asymmetric_key_id *id,
+					const struct asymmetric_key_id *skid,
 					bool partial)
 {
-	key_ref_t key;
-	char *id, *p;
-
+	struct key *key;
+	key_ref_t ref;
+	const char *lookup;
+	char *req, *p;
+	int len;
+
+	if (id) {
+		lookup = id->data;
+		len = id->len;
+	} else {
+		lookup = skid->data;
+		len = skid->len;
+	}
+	
 	/* Construct an identifier "id:<keyid>". */
-	p = id = kmalloc(2 + 1 + kid->len * 2 + 1, GFP_KERNEL);
-	if (!id)
+	p = req = kmalloc(2 + 1 + len * 2 + 1, GFP_KERNEL);
+	if (!req)
 		return ERR_PTR(-ENOMEM);
 
 	if (partial) {
@@ -92,32 +106,48 @@ struct key *x509_request_asymmetric_key(struct key *keyring,
 		*p++ = 'x';
 	}
 	*p++ = ':';
-	p = bin2hex(p, kid->data, kid->len);
+	p = bin2hex(p, lookup, len);
 	*p = 0;
 
-	pr_debug("Look up: \"%s\"\n", id);
+	pr_debug("Look up: \"%s\"\n", req);
 
-	key = keyring_search(make_key_ref(keyring, 1),
-			     &key_type_asymmetric, id);
-	if (IS_ERR(key))
-		pr_debug("Request for key '%s' err %ld\n", id, PTR_ERR(key));
-	kfree(id);
+	ref = keyring_search(make_key_ref(keyring, 1),
+			     &key_type_asymmetric, req);
+	if (IS_ERR(ref))
+		pr_debug("Request for key '%s' err %ld\n", req, PTR_ERR(ref));
+	kfree(req);
 
-	if (IS_ERR(key)) {
-		switch (PTR_ERR(key)) {
+	if (IS_ERR(ref)) {
+		switch (PTR_ERR(ref)) {
 			/* Hide some search errors */
 		case -EACCES:
 		case -ENOTDIR:
 		case -EAGAIN:
 			return ERR_PTR(-ENOKEY);
 		default:
-			return ERR_CAST(key);
+			return ERR_CAST(ref);
+		}
+	}
+
+	key = key_ref_to_ptr(ref);
+	if (id && skid) {
+		const struct asymmetric_key_ids *kids = asymmetric_key_ids(key);
+		if (!kids->id[1]) {
+			pr_debug("issuer+serial match, but expected SKID missing\n");
+			goto reject;
+		}
+		if (!asymmetric_key_id_same(skid, kids->id[1])) {
+			pr_debug("issuer+serial match, but SKID does not\n");
+			goto reject;
 		}
 	}
+	
+	pr_devel("<==%s() = 0 [%x]\n", __func__, key_serial(key));
+	return key;
 
-	pr_devel("<==%s() = 0 [%x]\n", __func__,
-		 key_serial(key_ref_to_ptr(key)));
-	return key_ref_to_ptr(key);
+reject:
+	key_put(key);
+	return ERR_PTR(-EKEYREJECTED);
 }
 EXPORT_SYMBOL_GPL(x509_request_asymmetric_key);
 
@@ -230,7 +260,8 @@ static int x509_validate_trust(struct x509_certificate *cert,
 	if (ca_keyid && !asymmetric_key_id_partial(cert->akid_skid, ca_keyid))
 		return -EPERM;
 
-	key = x509_request_asymmetric_key(trust_keyring, cert->akid_skid,
+	key = x509_request_asymmetric_key(trust_keyring,
+					  cert->akid_id, cert->akid_skid,
 					  false);
 	if (!IS_ERR(key))  {
 		if (!use_builtin_keys
@@ -287,8 +318,9 @@ static int x509_key_preparse(struct key_preparsed_payload *prep)
 	cert->pub->id_type = PKEY_ID_X509;
 
 	/* Check the signature on the key if it appears to be self-signed */
-	if (!cert->akid_skid ||
-	    asymmetric_key_id_same(cert->skid, cert->akid_skid)) {
+	if ((!cert->akid_skid && !cert->akid_id) ||
+	    asymmetric_key_id_same(cert->skid, cert->akid_skid) ||
+	    asymmetric_key_id_same(cert->id, cert->akid_id)) {
 		ret = x509_check_signature(cert->pub, cert); /* self-signed */
 		if (ret < 0)
 			goto error_free_cert;
diff --git a/include/crypto/public_key.h b/include/crypto/public_key.h
index 54add206990166..b6f27a240856c2 100644
--- a/include/crypto/public_key.h
+++ b/include/crypto/public_key.h
@@ -101,7 +101,8 @@ extern int verify_signature(const struct key *key,
 
 struct asymmetric_key_id;
 extern struct key *x509_request_asymmetric_key(struct key *keyring,
-					       const struct asymmetric_key_id *kid,
+					       const struct asymmetric_key_id *id,
+					       const struct asymmetric_key_id *skid,
 					       bool partial);
 
 #endif /* _LINUX_PUBLIC_KEY_H */

From 4ebdb76f7da662346267384440492bb9d87c2aa3 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 20 Jul 2015 21:16:26 +0100
Subject: [PATCH 119/734] PKCS#7: Allow detached data to be supplied for
 signature checking purposes

It is possible for a PKCS#7 message to have detached data.  However, to verify
the signatures on a PKCS#7 message, we have to be able to digest the data.
Provide a function to supply that data.  An error is given if the PKCS#7
message included embedded data.

This is used in a subsequent patch to supply the data to module signing where
the signature is in the form of a PKCS#7 message with detached data, whereby
the detached data is the module content that is signed.

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Vivek Goyal <vgoyal@redhat.com>
---
 crypto/asymmetric_keys/pkcs7_verify.c | 25 +++++++++++++++++++++++++
 include/crypto/pkcs7.h                |  3 +++
 2 files changed, 28 insertions(+)

diff --git a/crypto/asymmetric_keys/pkcs7_verify.c b/crypto/asymmetric_keys/pkcs7_verify.c
index 42bfc9de0d796e..404f89a0f85252 100644
--- a/crypto/asymmetric_keys/pkcs7_verify.c
+++ b/crypto/asymmetric_keys/pkcs7_verify.c
@@ -382,3 +382,28 @@ int pkcs7_verify(struct pkcs7_message *pkcs7)
 	return enopkg;
 }
 EXPORT_SYMBOL_GPL(pkcs7_verify);
+
+/**
+ * pkcs7_supply_detached_data - Supply the data needed to verify a PKCS#7 message
+ * @pkcs7: The PKCS#7 message
+ * @data: The data to be verified
+ * @datalen: The amount of data
+ *
+ * Supply the detached data needed to verify a PKCS#7 message.  Note that no
+ * attempt to retain/pin the data is made.  That is left to the caller.  The
+ * data will not be modified by pkcs7_verify() and will not be freed when the
+ * PKCS#7 message is freed.
+ *
+ * Returns -EINVAL if data is already supplied in the message, 0 otherwise.
+ */
+int pkcs7_supply_detached_data(struct pkcs7_message *pkcs7,
+			       const void *data, size_t datalen)
+{
+	if (pkcs7->data) {
+		pr_debug("Data already supplied\n");
+		return -EINVAL;
+	}
+	pkcs7->data = data;
+	pkcs7->data_len = datalen;
+	return 0;
+}
diff --git a/include/crypto/pkcs7.h b/include/crypto/pkcs7.h
index 691c79172a2691..e235ab4957ee90 100644
--- a/include/crypto/pkcs7.h
+++ b/include/crypto/pkcs7.h
@@ -34,3 +34,6 @@ extern int pkcs7_validate_trust(struct pkcs7_message *pkcs7,
  * pkcs7_verify.c
  */
 extern int pkcs7_verify(struct pkcs7_message *pkcs7);
+
+extern int pkcs7_supply_detached_data(struct pkcs7_message *pkcs7,
+				      const void *data, size_t datalen);

From bc1c373dd2a5113800360f7152be729c9da996cc Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 20 Jul 2015 21:16:27 +0100
Subject: [PATCH 120/734] MODSIGN: Provide a utility to append a PKCS#7
 signature to a module

Provide a utility that:

 (1) Digests a module using the specified hash algorithm (typically sha256).

     [The digest can be dumped into a file by passing the '-d' flag]

 (2) Generates a PKCS#7 message that:

     (a) Has detached data (ie. the module content).

     (b) Is signed with the specified private key.

     (c) Refers to the specified X.509 certificate.

     (d) Has an empty X.509 certificate list.

     [The PKCS#7 message can be dumped into a file by passing the '-p' flag]

 (3) Generates a signed module by concatenating the old module, the PKCS#7
     message, a descriptor and a magic string.  The descriptor contains the
     size of the PKCS#7 message and indicates the id_type as PKEY_ID_PKCS7.

 (4) Either writes the signed module to the specified destination or renames
     it over the source module.

This allows module signing to reuse the PKCS#7 handling code that was added
for PE file parsing for signed kexec.

Note that the utility is written in C and must be linked against the OpenSSL
crypto library.

Note further that I have temporarily dropped support for handling externally
created signatures until we can work out the best way to do those.  Hopefully,
whoever creates the signature can give me a PKCS#7 certificate.

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Vivek Goyal <vgoyal@redhat.com>
---
 include/crypto/public_key.h |   1 +
 scripts/sign-file.c         | 205 ++++++++++++++++++++++++++++++++++++
 2 files changed, 206 insertions(+)
 create mode 100755 scripts/sign-file.c

diff --git a/include/crypto/public_key.h b/include/crypto/public_key.h
index b6f27a240856c2..fda097e079a4cc 100644
--- a/include/crypto/public_key.h
+++ b/include/crypto/public_key.h
@@ -33,6 +33,7 @@ extern const struct public_key_algorithm *pkey_algo[PKEY_ALGO__LAST];
 enum pkey_id_type {
 	PKEY_ID_PGP,		/* OpenPGP generated key ID */
 	PKEY_ID_X509,		/* X.509 arbitrary subjectKeyIdentifier */
+	PKEY_ID_PKCS7,		/* Signature in PKCS#7 message */
 	PKEY_ID_TYPE__LAST
 };
 
diff --git a/scripts/sign-file.c b/scripts/sign-file.c
new file mode 100755
index 00000000000000..5b8a6dda3235ab
--- /dev/null
+++ b/scripts/sign-file.c
@@ -0,0 +1,205 @@
+/* Sign a module file using the given key.
+ *
+ * Copyright (C) 2014 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <getopt.h>
+#include <err.h>
+#include <arpa/inet.h>
+#include <openssl/bio.h>
+#include <openssl/evp.h>
+#include <openssl/pem.h>
+#include <openssl/pkcs7.h>
+#include <openssl/err.h>
+
+struct module_signature {
+	uint8_t		algo;		/* Public-key crypto algorithm [0] */
+	uint8_t		hash;		/* Digest algorithm [0] */
+	uint8_t		id_type;	/* Key identifier type [PKEY_ID_PKCS7] */
+	uint8_t		signer_len;	/* Length of signer's name [0] */
+	uint8_t		key_id_len;	/* Length of key identifier [0] */
+	uint8_t		__pad[3];
+	uint32_t	sig_len;	/* Length of signature data */
+};
+
+#define PKEY_ID_PKCS7 2
+
+static char magic_number[] = "~Module signature appended~\n";
+
+static __attribute__((noreturn))
+void format(void)
+{
+	fprintf(stderr,
+		"Usage: scripts/sign-file [-dp] <hash algo> <key> <x509> <module> [<dest>]\n");
+	exit(2);
+}
+
+static void display_openssl_errors(int l)
+{
+	const char *file;
+	char buf[120];
+	int e, line;
+
+	if (ERR_peek_error() == 0)
+		return;
+	fprintf(stderr, "At main.c:%d:\n", l);
+
+	while ((e = ERR_get_error_line(&file, &line))) {
+		ERR_error_string(e, buf);
+		fprintf(stderr, "- SSL %s: %s:%d\n", buf, file, line);
+	}
+}
+
+static void drain_openssl_errors(void)
+{
+	const char *file;
+	int line;
+
+	if (ERR_peek_error() == 0)
+		return;
+	while (ERR_get_error_line(&file, &line)) {}
+}
+
+#define ERR(cond, fmt, ...)				\
+	do {						\
+		bool __cond = (cond);			\
+		display_openssl_errors(__LINE__);	\
+		if (__cond) {				\
+			err(1, fmt, ## __VA_ARGS__);	\
+		}					\
+	} while(0)
+
+int main(int argc, char **argv)
+{
+	struct module_signature sig_info = { .id_type = PKEY_ID_PKCS7 };
+	char *hash_algo = NULL;
+	char *private_key_name, *x509_name, *module_name, *dest_name;
+	bool save_pkcs7 = false, replace_orig;
+	unsigned char buf[4096];
+	unsigned long module_size, pkcs7_size;
+	const EVP_MD *digest_algo;
+	EVP_PKEY *private_key;
+	PKCS7 *pkcs7;
+	X509 *x509;
+	BIO *b, *bd, *bm;
+	int opt, n;
+
+	ERR_load_crypto_strings();
+	ERR_clear_error();
+
+	do {
+		opt = getopt(argc, argv, "dp");
+		switch (opt) {
+		case 'p': save_pkcs7 = true; break;
+		case -1: break;
+		default: format();
+		}
+	} while (opt != -1);
+
+	argc -= optind;
+	argv += optind;
+	if (argc < 4 || argc > 5)
+		format();
+
+	hash_algo = argv[0];
+	private_key_name = argv[1];
+	x509_name = argv[2];
+	module_name = argv[3];
+	if (argc == 5) {
+		dest_name = argv[4];
+		replace_orig = false;
+	} else {
+		ERR(asprintf(&dest_name, "%s.~signed~", module_name) < 0,
+		    "asprintf");
+		replace_orig = true;
+	}
+
+	/* Read the private key and the X.509 cert the PKCS#7 message
+	 * will point to.
+	 */
+	b = BIO_new_file(private_key_name, "rb");
+	ERR(!b, "%s", private_key_name);
+        private_key = PEM_read_bio_PrivateKey(b, NULL, NULL, NULL);
+	BIO_free(b);
+
+	b = BIO_new_file(x509_name, "rb");
+	ERR(!b, "%s", x509_name);
+	x509 = d2i_X509_bio(b, NULL); /* Binary encoded X.509 */
+	if (!x509) {
+		BIO_reset(b);
+		x509 = PEM_read_bio_X509(b, NULL, NULL, NULL); /* PEM encoded X.509 */
+		if (x509)
+			drain_openssl_errors();
+	}
+	BIO_free(b);
+	ERR(!x509, "%s", x509_name);
+
+	/* Open the destination file now so that we can shovel the module data
+	 * across as we read it.
+	 */
+	bd = BIO_new_file(dest_name, "wb");
+	ERR(!bd, "%s", dest_name);
+
+	/* Digest the module data. */
+	OpenSSL_add_all_digests();
+	display_openssl_errors(__LINE__);
+	digest_algo = EVP_get_digestbyname(hash_algo);
+	ERR(!digest_algo, "EVP_get_digestbyname");
+
+	bm = BIO_new_file(module_name, "rb");
+	ERR(!bm, "%s", module_name);
+
+	/* Load the PKCS#7 message from the digest buffer. */
+	pkcs7 = PKCS7_sign(NULL, NULL, NULL, NULL,
+			   PKCS7_NOCERTS | PKCS7_PARTIAL | PKCS7_BINARY | PKCS7_DETACHED | PKCS7_STREAM);
+	ERR(!pkcs7, "PKCS7_sign");
+
+	ERR(!PKCS7_sign_add_signer(pkcs7, x509, private_key, digest_algo, PKCS7_NOCERTS | PKCS7_BINARY),
+	    "PKCS7_sign_add_signer");
+	ERR(PKCS7_final(pkcs7, bm, PKCS7_NOCERTS | PKCS7_BINARY) < 0,
+	    "PKCS7_final");
+
+	if (save_pkcs7) {
+		char *pkcs7_name;
+
+		ERR(asprintf(&pkcs7_name, "%s.pkcs7", module_name) < 0, "asprintf");
+		b = BIO_new_file(pkcs7_name, "wb");
+		ERR(!b, "%s", pkcs7_name);
+		ERR(i2d_PKCS7_bio_stream(b, pkcs7, NULL, 0) < 0, "%s", pkcs7_name);
+		BIO_free(b);
+	}
+
+	/* Append the marker and the PKCS#7 message to the destination file */
+	ERR(BIO_reset(bm) < 0, "%s", module_name);
+	while ((n = BIO_read(bm, buf, sizeof(buf))),
+	       n > 0) {
+		ERR(BIO_write(bd, buf, n) < 0, "%s", dest_name);
+	}
+	ERR(n < 0, "%s", module_name);
+	module_size = BIO_number_written(bd);
+
+	ERR(i2d_PKCS7_bio_stream(bd, pkcs7, NULL, 0) < 0, "%s", dest_name);
+	pkcs7_size = BIO_number_written(bd) - module_size;
+	sig_info.sig_len = htonl(pkcs7_size);
+	ERR(BIO_write(bd, &sig_info, sizeof(sig_info)) < 0, "%s", dest_name);
+	ERR(BIO_write(bd, magic_number, sizeof(magic_number) - 1) < 0, "%s", dest_name);
+
+	ERR(BIO_free(bd) < 0, "%s", dest_name);
+
+	/* Finally, if we're signing in place, replace the original. */
+	if (replace_orig)
+		ERR(rename(dest_name, module_name) < 0, "%s", dest_name);
+
+	return 0;
+}

From 3f1e1bea34740069f70c6bc92d0f712345d5c28e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 20 Jul 2015 21:16:27 +0100
Subject: [PATCH 121/734] MODSIGN: Use PKCS#7 messages as module signatures

Move to using PKCS#7 messages as module signatures because:

 (1) We have to be able to support the use of X.509 certificates that don't
     have a subjKeyId set.  We're currently relying on this to look up the
     X.509 certificate in the trusted keyring list.

 (2) PKCS#7 message signed information blocks have a field that supplies the
     data required to match with the X.509 certificate that signed it.

 (3) The PKCS#7 certificate carries fields that specify the digest algorithm
     used to generate the signature in a standardised way and the X.509
     certificates specify the public key algorithm in a standardised way - so
     we don't need our own methods of specifying these.

 (4) We now have PKCS#7 message support in the kernel for signed kexec purposes
     and we can make use of this.

To make this work, the old sign-file script has been replaced with a program
that needs compiling in a previous patch.  The rules to build it are added
here.

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Vivek Goyal <vgoyal@redhat.com>
---
 Makefile                |   2 +-
 init/Kconfig            |   1 +
 kernel/module_signing.c | 220 +++++----------------
 scripts/Makefile        |   2 +
 scripts/sign-file       | 421 ----------------------------------------
 5 files changed, 48 insertions(+), 598 deletions(-)
 delete mode 100755 scripts/sign-file

diff --git a/Makefile b/Makefile
index a9ad4908e87015..dc87ec280fbc28 100644
--- a/Makefile
+++ b/Makefile
@@ -873,7 +873,7 @@ ifdef CONFIG_MODULE_SIG_ALL
 MODSECKEY = ./signing_key.priv
 MODPUBKEY = ./signing_key.x509
 export MODPUBKEY
-mod_sign_cmd = perl $(srctree)/scripts/sign-file $(CONFIG_MODULE_SIG_HASH) $(MODSECKEY) $(MODPUBKEY)
+mod_sign_cmd = scripts/sign-file $(CONFIG_MODULE_SIG_HASH) $(MODSECKEY) $(MODPUBKEY)
 else
 mod_sign_cmd = true
 endif
diff --git a/init/Kconfig b/init/Kconfig
index af09b4fb43d291..e16d9e587cee88 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1869,6 +1869,7 @@ config MODULE_SIG
 	select ASN1
 	select OID_REGISTRY
 	select X509_CERTIFICATE_PARSER
+	select PKCS7_MESSAGE_PARSER
 	help
 	  Check modules for valid signatures upon load: the signature
 	  is simply appended to the module. For more information see
diff --git a/kernel/module_signing.c b/kernel/module_signing.c
index be5b8fac4bd0de..8eb20cc66b3961 100644
--- a/kernel/module_signing.c
+++ b/kernel/module_signing.c
@@ -11,10 +11,9 @@
 
 #include <linux/kernel.h>
 #include <linux/err.h>
-#include <crypto/public_key.h>
-#include <crypto/hash.h>
-#include <keys/asymmetric-type.h>
 #include <keys/system_keyring.h>
+#include <crypto/public_key.h>
+#include <crypto/pkcs7.h>
 #include "module-internal.h"
 
 /*
@@ -28,157 +27,53 @@
  *	- Information block
  */
 struct module_signature {
-	u8	algo;		/* Public-key crypto algorithm [enum pkey_algo] */
-	u8	hash;		/* Digest algorithm [enum hash_algo] */
-	u8	id_type;	/* Key identifier type [enum pkey_id_type] */
-	u8	signer_len;	/* Length of signer's name */
-	u8	key_id_len;	/* Length of key identifier */
+	u8	algo;		/* Public-key crypto algorithm [0] */
+	u8	hash;		/* Digest algorithm [0] */
+	u8	id_type;	/* Key identifier type [PKEY_ID_PKCS7] */
+	u8	signer_len;	/* Length of signer's name [0] */
+	u8	key_id_len;	/* Length of key identifier [0] */
 	u8	__pad[3];
 	__be32	sig_len;	/* Length of signature data */
 };
 
 /*
- * Digest the module contents.
+ * Verify a PKCS#7-based signature on a module.
  */
-static struct public_key_signature *mod_make_digest(enum hash_algo hash,
-						    const void *mod,
-						    unsigned long modlen)
+static int mod_verify_pkcs7(const void *mod, unsigned long modlen,
+			    const void *raw_pkcs7, size_t pkcs7_len)
 {
-	struct public_key_signature *pks;
-	struct crypto_shash *tfm;
-	struct shash_desc *desc;
-	size_t digest_size, desc_size;
+	struct pkcs7_message *pkcs7;
+	bool trusted;
 	int ret;
 
-	pr_devel("==>%s()\n", __func__);
-	
-	/* Allocate the hashing algorithm we're going to need and find out how
-	 * big the hash operational data will be.
-	 */
-	tfm = crypto_alloc_shash(hash_algo_name[hash], 0, 0);
-	if (IS_ERR(tfm))
-		return (PTR_ERR(tfm) == -ENOENT) ? ERR_PTR(-ENOPKG) : ERR_CAST(tfm);
-
-	desc_size = crypto_shash_descsize(tfm) + sizeof(*desc);
-	digest_size = crypto_shash_digestsize(tfm);
-
-	/* We allocate the hash operational data storage on the end of our
-	 * context data and the digest output buffer on the end of that.
-	 */
-	ret = -ENOMEM;
-	pks = kzalloc(digest_size + sizeof(*pks) + desc_size, GFP_KERNEL);
-	if (!pks)
-		goto error_no_pks;
-
-	pks->pkey_hash_algo	= hash;
-	pks->digest		= (u8 *)pks + sizeof(*pks) + desc_size;
-	pks->digest_size	= digest_size;
-
-	desc = (void *)pks + sizeof(*pks);
-	desc->tfm   = tfm;
-	desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
-
-	ret = crypto_shash_init(desc);
+	pkcs7 = pkcs7_parse_message(raw_pkcs7, pkcs7_len);
+	if (IS_ERR(pkcs7))
+		return PTR_ERR(pkcs7);
+
+	/* The data should be detached - so we need to supply it. */
+	if (pkcs7_supply_detached_data(pkcs7, mod, modlen) < 0) {
+		pr_err("PKCS#7 signature with non-detached data\n");
+		ret = -EBADMSG;
+		goto error;
+	}
+
+	ret = pkcs7_verify(pkcs7);
 	if (ret < 0)
 		goto error;
 
-	ret = crypto_shash_finup(desc, mod, modlen, pks->digest);
+	ret = pkcs7_validate_trust(pkcs7, system_trusted_keyring, &trusted);
 	if (ret < 0)
 		goto error;
 
-	crypto_free_shash(tfm);
-	pr_devel("<==%s() = ok\n", __func__);
-	return pks;
+	if (!trusted) {
+		pr_err("PKCS#7 signature not signed with a trusted key\n");
+		ret = -ENOKEY;
+	}
 
 error:
-	kfree(pks);
-error_no_pks:
-	crypto_free_shash(tfm);
+	pkcs7_free_message(pkcs7);
 	pr_devel("<==%s() = %d\n", __func__, ret);
-	return ERR_PTR(ret);
-}
-
-/*
- * Extract an MPI array from the signature data.  This represents the actual
- * signature.  Each raw MPI is prefaced by a BE 2-byte value indicating the
- * size of the MPI in bytes.
- *
- * RSA signatures only have one MPI, so currently we only read one.
- */
-static int mod_extract_mpi_array(struct public_key_signature *pks,
-				 const void *data, size_t len)
-{
-	size_t nbytes;
-	MPI mpi;
-
-	if (len < 3)
-		return -EBADMSG;
-	nbytes = ((const u8 *)data)[0] << 8 | ((const u8 *)data)[1];
-	data += 2;
-	len -= 2;
-	if (len != nbytes)
-		return -EBADMSG;
-
-	mpi = mpi_read_raw_data(data, nbytes);
-	if (!mpi)
-		return -ENOMEM;
-	pks->mpi[0] = mpi;
-	pks->nr_mpi = 1;
-	return 0;
-}
-
-/*
- * Request an asymmetric key.
- */
-static struct key *request_asymmetric_key(const char *signer, size_t signer_len,
-					  const u8 *key_id, size_t key_id_len)
-{
-	key_ref_t key;
-	size_t i;
-	char *id, *q;
-
-	pr_devel("==>%s(,%zu,,%zu)\n", __func__, signer_len, key_id_len);
-
-	/* Construct an identifier. */
-	id = kmalloc(signer_len + 2 + key_id_len * 2 + 1, GFP_KERNEL);
-	if (!id)
-		return ERR_PTR(-ENOKEY);
-
-	memcpy(id, signer, signer_len);
-
-	q = id + signer_len;
-	*q++ = ':';
-	*q++ = ' ';
-	for (i = 0; i < key_id_len; i++) {
-		*q++ = hex_asc[*key_id >> 4];
-		*q++ = hex_asc[*key_id++ & 0x0f];
-	}
-
-	*q = 0;
-
-	pr_debug("Look up: \"%s\"\n", id);
-
-	key = keyring_search(make_key_ref(system_trusted_keyring, 1),
-			     &key_type_asymmetric, id);
-	if (IS_ERR(key))
-		pr_warn("Request for unknown module key '%s' err %ld\n",
-			id, PTR_ERR(key));
-	kfree(id);
-
-	if (IS_ERR(key)) {
-		switch (PTR_ERR(key)) {
-			/* Hide some search errors */
-		case -EACCES:
-		case -ENOTDIR:
-		case -EAGAIN:
-			return ERR_PTR(-ENOKEY);
-		default:
-			return ERR_CAST(key);
-		}
-	}
-
-	pr_devel("<==%s() = 0 [%x]\n", __func__, key_serial(key_ref_to_ptr(key)));
-	return key_ref_to_ptr(key);
+	return ret;
 }
 
 /*
@@ -186,12 +81,8 @@ static struct key *request_asymmetric_key(const char *signer, size_t signer_len,
  */
 int mod_verify_sig(const void *mod, unsigned long *_modlen)
 {
-	struct public_key_signature *pks;
 	struct module_signature ms;
-	struct key *key;
-	const void *sig;
 	size_t modlen = *_modlen, sig_len;
-	int ret;
 
 	pr_devel("==>%s(,%zu)\n", __func__, modlen);
 
@@ -205,46 +96,23 @@ int mod_verify_sig(const void *mod, unsigned long *_modlen)
 	if (sig_len >= modlen)
 		return -EBADMSG;
 	modlen -= sig_len;
-	if ((size_t)ms.signer_len + ms.key_id_len >= modlen)
-		return -EBADMSG;
-	modlen -= (size_t)ms.signer_len + ms.key_id_len;
-
 	*_modlen = modlen;
-	sig = mod + modlen;
-
-	/* For the moment, only support RSA and X.509 identifiers */
-	if (ms.algo != PKEY_ALGO_RSA ||
-	    ms.id_type != PKEY_ID_X509)
-		return -ENOPKG;
 
-	if (ms.hash >= PKEY_HASH__LAST ||
-	    !hash_algo_name[ms.hash])
+	if (ms.id_type != PKEY_ID_PKCS7) {
+		pr_err("Module is not signed with expected PKCS#7 message\n");
 		return -ENOPKG;
-
-	key = request_asymmetric_key(sig, ms.signer_len,
-				     sig + ms.signer_len, ms.key_id_len);
-	if (IS_ERR(key))
-		return PTR_ERR(key);
-
-	pks = mod_make_digest(ms.hash, mod, modlen);
-	if (IS_ERR(pks)) {
-		ret = PTR_ERR(pks);
-		goto error_put_key;
 	}
 
-	ret = mod_extract_mpi_array(pks, sig + ms.signer_len + ms.key_id_len,
-				    sig_len);
-	if (ret < 0)
-		goto error_free_pks;
-
-	ret = verify_signature(key, pks);
-	pr_devel("verify_signature() = %d\n", ret);
+	if (ms.algo != 0 ||
+	    ms.hash != 0 ||
+	    ms.signer_len != 0 ||
+	    ms.key_id_len != 0 ||
+	    ms.__pad[0] != 0 ||
+	    ms.__pad[1] != 0 ||
+	    ms.__pad[2] != 0) {
+		pr_err("PKCS#7 signature info has unexpected non-zero params\n");
+		return -EBADMSG;
+	}
 
-error_free_pks:
-	mpi_free(pks->rsa.s);
-	kfree(pks);
-error_put_key:
-	key_put(key);
-	pr_devel("<==%s() = %d\n", __func__, ret);
-	return ret;	
+	return mod_verify_pkcs7(mod, modlen, mod + modlen, sig_len);
 }
diff --git a/scripts/Makefile b/scripts/Makefile
index 2016a64497ab1c..b12fe020664df1 100644
--- a/scripts/Makefile
+++ b/scripts/Makefile
@@ -16,9 +16,11 @@ hostprogs-$(CONFIG_VT)           += conmakehash
 hostprogs-$(BUILD_C_RECORDMCOUNT) += recordmcount
 hostprogs-$(CONFIG_BUILDTIME_EXTABLE_SORT) += sortextable
 hostprogs-$(CONFIG_ASN1)	 += asn1_compiler
+hostprogs-$(CONFIG_MODULE_SIG)	 += sign-file
 
 HOSTCFLAGS_sortextable.o = -I$(srctree)/tools/include
 HOSTCFLAGS_asn1_compiler.o = -I$(srctree)/include
+HOSTLOADLIBES_sign-file = -lcrypto
 
 always		:= $(hostprogs-y) $(hostprogs-m)
 
diff --git a/scripts/sign-file b/scripts/sign-file
deleted file mode 100755
index 3906ee1e2f76ba..00000000000000
--- a/scripts/sign-file
+++ /dev/null
@@ -1,421 +0,0 @@
-#!/usr/bin/perl -w
-#
-# Sign a module file using the given key.
-#
-
-my $USAGE =
-"Usage: scripts/sign-file [-v] <hash algo> <key> <x509> <module> [<dest>]\n" .
-"       scripts/sign-file [-v] -s <raw sig> <hash algo> <x509> <module> [<dest>]\n";
-
-use strict;
-use FileHandle;
-use IPC::Open2;
-use Getopt::Std;
-
-my %opts;
-getopts('vs:', \%opts) or die $USAGE;
-my $verbose = $opts{'v'};
-my $signature_file = $opts{'s'};
-
-die $USAGE if ($#ARGV > 4);
-die $USAGE if (!$signature_file && $#ARGV < 3 || $signature_file && $#ARGV < 2);
-
-my $dgst = shift @ARGV;
-my $private_key;
-if (!$signature_file) {
-	$private_key = shift @ARGV;
-}
-my $x509 = shift @ARGV;
-my $module = shift @ARGV;
-my ($dest, $keep_orig);
-if (@ARGV) {
-	$dest = $ARGV[0];
-	$keep_orig = 1;
-} else {
-	$dest = $module . "~";
-}
-
-die "Can't read private key\n" if (!$signature_file && !-r $private_key);
-die "Can't read signature file\n" if ($signature_file && !-r $signature_file);
-die "Can't read X.509 certificate\n" unless (-r $x509);
-die "Can't read module\n" unless (-r $module);
-
-#
-# Function to read the contents of a file into a variable.
-#
-sub read_file($)
-{
-    my ($file) = @_;
-    my $contents;
-    my $len;
-
-    open(FD, "<$file") || die $file;
-    binmode FD;
-    my @st = stat(FD);
-    die $file if (!@st);
-    $len = read(FD, $contents, $st[7]) || die $file;
-    close(FD) || die $file;
-    die "$file: Wanted length ", $st[7], ", got ", $len, "\n"
-	if ($len != $st[7]);
-    return $contents;
-}
-
-###############################################################################
-#
-# First of all, we have to parse the X.509 certificate to find certain details
-# about it.
-#
-# We read the DER-encoded X509 certificate and parse it to extract the Subject
-# name and Subject Key Identifier.  Theis provides the data we need to build
-# the certificate identifier.
-#
-# The signer's name part of the identifier is fabricated from the commonName,
-# the organizationName or the emailAddress components of the X.509 subject
-# name.
-#
-# The subject key ID is used to select which of that signer's certificates
-# we're intending to use to sign the module.
-#
-###############################################################################
-my $x509_certificate = read_file($x509);
-
-my $UNIV = 0 << 6;
-my $APPL = 1 << 6;
-my $CONT = 2 << 6;
-my $PRIV = 3 << 6;
-
-my $CONS = 0x20;
-
-my $BOOLEAN	= 0x01;
-my $INTEGER	= 0x02;
-my $BIT_STRING	= 0x03;
-my $OCTET_STRING = 0x04;
-my $NULL	= 0x05;
-my $OBJ_ID	= 0x06;
-my $UTF8String	= 0x0c;
-my $SEQUENCE	= 0x10;
-my $SET		= 0x11;
-my $UTCTime	= 0x17;
-my $GeneralizedTime = 0x18;
-
-my %OIDs = (
-    pack("CCC", 85, 4, 3)	=> "commonName",
-    pack("CCC", 85, 4, 6)	=> "countryName",
-    pack("CCC", 85, 4, 10)	=> "organizationName",
-    pack("CCC", 85, 4, 11)	=> "organizationUnitName",
-    pack("CCCCCCCCC", 42, 134, 72, 134, 247, 13, 1, 1, 1) => "rsaEncryption",
-    pack("CCCCCCCCC", 42, 134, 72, 134, 247, 13, 1, 1, 5) => "sha1WithRSAEncryption",
-    pack("CCCCCCCCC", 42, 134, 72, 134, 247, 13, 1, 9, 1) => "emailAddress",
-    pack("CCC", 85, 29, 35)	=> "authorityKeyIdentifier",
-    pack("CCC", 85, 29, 14)	=> "subjectKeyIdentifier",
-    pack("CCC", 85, 29, 19)	=> "basicConstraints"
-);
-
-###############################################################################
-#
-# Extract an ASN.1 element from a string and return information about it.
-#
-###############################################################################
-sub asn1_extract($$@)
-{
-    my ($cursor, $expected_tag, $optional) = @_;
-
-    return [ -1 ]
-	if ($cursor->[1] == 0 && $optional);
-
-    die $x509, ": ", $cursor->[0], ": ASN.1 data underrun (elem ", $cursor->[1], ")\n"
-	if ($cursor->[1] < 2);
-
-    my ($tag, $len) = unpack("CC", substr(${$cursor->[2]}, $cursor->[0], 2));
-
-    if ($expected_tag != -1 && $tag != $expected_tag) {
-	return [ -1 ]
-	    if ($optional);
-	die $x509, ": ", $cursor->[0], ": ASN.1 unexpected tag (", $tag,
-	" not ", $expected_tag, ")\n";
-    }
-
-    $cursor->[0] += 2;
-    $cursor->[1] -= 2;
-
-    die $x509, ": ", $cursor->[0], ": ASN.1 long tag\n"
-	if (($tag & 0x1f) == 0x1f);
-    die $x509, ": ", $cursor->[0], ": ASN.1 indefinite length\n"
-	if ($len == 0x80);
-
-    if ($len > 0x80) {
-	my $l = $len - 0x80;
-	die $x509, ": ", $cursor->[0], ": ASN.1 data underrun (len len $l)\n"
-	    if ($cursor->[1] < $l);
-
-	if ($l == 0x1) {
-	    $len = unpack("C", substr(${$cursor->[2]}, $cursor->[0], 1));
-	} elsif ($l == 0x2) {
-	    $len = unpack("n", substr(${$cursor->[2]}, $cursor->[0], 2));
-	} elsif ($l == 0x3) {
-	    $len = unpack("C", substr(${$cursor->[2]}, $cursor->[0], 1)) << 16;
-	    $len = unpack("n", substr(${$cursor->[2]}, $cursor->[0] + 1, 2));
-	} elsif ($l == 0x4) {
-	    $len = unpack("N", substr(${$cursor->[2]}, $cursor->[0], 4));
-	} else {
-	    die $x509, ": ", $cursor->[0], ": ASN.1 element too long (", $l, ")\n";
-	}
-
-	$cursor->[0] += $l;
-	$cursor->[1] -= $l;
-    }
-
-    die $x509, ": ", $cursor->[0], ": ASN.1 data underrun (", $len, ")\n"
-	if ($cursor->[1] < $len);
-
-    my $ret = [ $tag, [ $cursor->[0], $len, $cursor->[2] ] ];
-    $cursor->[0] += $len;
-    $cursor->[1] -= $len;
-
-    return $ret;
-}
-
-###############################################################################
-#
-# Retrieve the data referred to by a cursor
-#
-###############################################################################
-sub asn1_retrieve($)
-{
-    my ($cursor) = @_;
-    my ($offset, $len, $data) = @$cursor;
-    return substr($$data, $offset, $len);
-}
-
-###############################################################################
-#
-# Roughly parse the X.509 certificate
-#
-###############################################################################
-my $cursor = [ 0, length($x509_certificate), \$x509_certificate ];
-
-my $cert = asn1_extract($cursor, $UNIV | $CONS | $SEQUENCE);
-my $tbs = asn1_extract($cert->[1], $UNIV | $CONS | $SEQUENCE);
-my $version = asn1_extract($tbs->[1], $CONT | $CONS | 0, 1);
-my $serial_number = asn1_extract($tbs->[1], $UNIV | $INTEGER);
-my $sig_type = asn1_extract($tbs->[1], $UNIV | $CONS | $SEQUENCE);
-my $issuer = asn1_extract($tbs->[1], $UNIV | $CONS | $SEQUENCE);
-my $validity = asn1_extract($tbs->[1], $UNIV | $CONS | $SEQUENCE);
-my $subject = asn1_extract($tbs->[1], $UNIV | $CONS | $SEQUENCE);
-my $key = asn1_extract($tbs->[1], $UNIV | $CONS | $SEQUENCE);
-my $issuer_uid = asn1_extract($tbs->[1], $CONT | $CONS | 1, 1);
-my $subject_uid = asn1_extract($tbs->[1], $CONT | $CONS | 2, 1);
-my $extension_list = asn1_extract($tbs->[1], $CONT | $CONS | 3, 1);
-
-my $subject_key_id = ();
-my $authority_key_id = ();
-
-#
-# Parse the extension list
-#
-if ($extension_list->[0] != -1) {
-    my $extensions = asn1_extract($extension_list->[1], $UNIV | $CONS | $SEQUENCE);
-
-    while ($extensions->[1]->[1] > 0) {
-	my $ext = asn1_extract($extensions->[1], $UNIV | $CONS | $SEQUENCE);
-	my $x_oid = asn1_extract($ext->[1], $UNIV | $OBJ_ID);
-	my $x_crit = asn1_extract($ext->[1], $UNIV | $BOOLEAN, 1);
-	my $x_val = asn1_extract($ext->[1], $UNIV | $OCTET_STRING);
-
-	my $raw_oid = asn1_retrieve($x_oid->[1]);
-	next if (!exists($OIDs{$raw_oid}));
-	my $x_type = $OIDs{$raw_oid};
-
-	my $raw_value = asn1_retrieve($x_val->[1]);
-
-	if ($x_type eq "subjectKeyIdentifier") {
-	    my $vcursor = [ 0, length($raw_value), \$raw_value ];
-
-	    $subject_key_id = asn1_extract($vcursor, $UNIV | $OCTET_STRING);
-	}
-    }
-}
-
-###############################################################################
-#
-# Determine what we're going to use as the signer's name.  In order of
-# preference, take one of: commonName, organizationName or emailAddress.
-#
-###############################################################################
-my $org = "";
-my $cn = "";
-my $email = "";
-
-while ($subject->[1]->[1] > 0) {
-    my $rdn = asn1_extract($subject->[1], $UNIV | $CONS | $SET);
-    my $attr = asn1_extract($rdn->[1], $UNIV | $CONS | $SEQUENCE);
-    my $n_oid = asn1_extract($attr->[1], $UNIV | $OBJ_ID);
-    my $n_val = asn1_extract($attr->[1], -1);
-
-    my $raw_oid = asn1_retrieve($n_oid->[1]);
-    next if (!exists($OIDs{$raw_oid}));
-    my $n_type = $OIDs{$raw_oid};
-
-    my $raw_value = asn1_retrieve($n_val->[1]);
-
-    if ($n_type eq "organizationName") {
-	$org = $raw_value;
-    } elsif ($n_type eq "commonName") {
-	$cn = $raw_value;
-    } elsif ($n_type eq "emailAddress") {
-	$email = $raw_value;
-    }
-}
-
-my $signers_name = $email;
-
-if ($org && $cn) {
-    # Don't use the organizationName if the commonName repeats it
-    if (length($org) <= length($cn) &&
-	substr($cn, 0, length($org)) eq $org) {
-	$signers_name = $cn;
-	goto got_id_name;
-    }
-
-    # Or a signifcant chunk of it
-    if (length($org) >= 7 &&
-	length($cn) >= 7 &&
-	substr($cn, 0, 7) eq substr($org, 0, 7)) {
-	$signers_name = $cn;
-	goto got_id_name;
-    }
-
-    $signers_name = $org . ": " . $cn;
-} elsif ($org) {
-    $signers_name = $org;
-} elsif ($cn) {
-    $signers_name = $cn;
-}
-
-got_id_name:
-
-die $x509, ": ", "X.509: Couldn't find the Subject Key Identifier extension\n"
-    if (!$subject_key_id);
-
-my $key_identifier = asn1_retrieve($subject_key_id->[1]);
-
-###############################################################################
-#
-# Create and attach the module signature
-#
-###############################################################################
-
-#
-# Signature parameters
-#
-my $algo = 1;		# Public-key crypto algorithm: RSA
-my $hash = 0;		# Digest algorithm
-my $id_type = 1;	# Identifier type: X.509
-
-#
-# Digest the data
-#
-my $prologue;
-if ($dgst eq "sha1") {
-    $prologue = pack("C*",
-		     0x30, 0x21, 0x30, 0x09, 0x06, 0x05,
-		     0x2B, 0x0E, 0x03, 0x02, 0x1A,
-		     0x05, 0x00, 0x04, 0x14);
-    $hash = 2;
-} elsif ($dgst eq "sha224") {
-    $prologue = pack("C*",
-		     0x30, 0x2d, 0x30, 0x0d, 0x06, 0x09,
-		     0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x04,
-		     0x05, 0x00, 0x04, 0x1C);
-    $hash = 7;
-} elsif ($dgst eq "sha256") {
-    $prologue = pack("C*",
-		     0x30, 0x31, 0x30, 0x0d, 0x06, 0x09,
-		     0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01,
-		     0x05, 0x00, 0x04, 0x20);
-    $hash = 4;
-} elsif ($dgst eq "sha384") {
-    $prologue = pack("C*",
-		     0x30, 0x41, 0x30, 0x0d, 0x06, 0x09,
-		     0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x02,
-		     0x05, 0x00, 0x04, 0x30);
-    $hash = 5;
-} elsif ($dgst eq "sha512") {
-    $prologue = pack("C*",
-		     0x30, 0x51, 0x30, 0x0d, 0x06, 0x09,
-		     0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03,
-		     0x05, 0x00, 0x04, 0x40);
-    $hash = 6;
-} else {
-    die "Unknown hash algorithm: $dgst\n";
-}
-
-my $signature;
-if ($signature_file) {
-	$signature = read_file($signature_file);
-} else {
-	#
-	# Generate the digest and read from openssl's stdout
-	#
-	my $digest;
-	$digest = readpipe("openssl dgst -$dgst -binary $module") || die "openssl dgst";
-
-	#
-	# Generate the binary signature, which will be just the integer that
-	# comprises the signature with no metadata attached.
-	#
-	my $pid;
-	$pid = open2(*read_from, *write_to,
-		     "openssl rsautl -sign -inkey $private_key -keyform PEM") ||
-	    die "openssl rsautl";
-	binmode write_to;
-	print write_to $prologue . $digest || die "pipe to openssl rsautl";
-	close(write_to) || die "pipe to openssl rsautl";
-
-	binmode read_from;
-	read(read_from, $signature, 4096) || die "pipe from openssl rsautl";
-	close(read_from) || die "pipe from openssl rsautl";
-	waitpid($pid, 0) || die;
-	die "openssl rsautl died: $?" if ($? >> 8);
-}
-$signature = pack("n", length($signature)) . $signature,
-
-#
-# Build the signed binary
-#
-my $unsigned_module = read_file($module);
-
-my $magic_number = "~Module signature appended~\n";
-
-my $info = pack("CCCCCxxxN",
-		$algo, $hash, $id_type,
-		length($signers_name),
-		length($key_identifier),
-		length($signature));
-
-if ($verbose) {
-    print "Size of unsigned module: ", length($unsigned_module), "\n";
-    print "Size of signer's name  : ", length($signers_name), "\n";
-    print "Size of key identifier : ", length($key_identifier), "\n";
-    print "Size of signature      : ", length($signature), "\n";
-    print "Size of information    : ", length($info), "\n";
-    print "Size of magic number   : ", length($magic_number), "\n";
-    print "Signer's name          : '", $signers_name, "'\n";
-    print "Digest                 : $dgst\n";
-}
-
-open(FD, ">$dest") || die $dest;
-binmode FD;
-print FD
-    $unsigned_module,
-    $signers_name,
-    $key_identifier,
-    $signature,
-    $info,
-    $magic_number
-    ;
-close FD || die $dest;
-
-if (!$keep_orig) {
-    rename($dest, $module) || die $module;
-}

From 23dfbbabbb3a62104b040b422121c84800312ad0 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@suse.com>
Date: Mon, 20 Jul 2015 21:16:27 +0100
Subject: [PATCH 122/734] sign-file: Add option to only create signature file

Make the -d option (which currently isn't actually wired to anything) write
out the PKCS#7 message as per the -p option and then exit without either
modifying the source or writing out a compound file of the source, signature
and metadata.

This will be useful when firmware signature support is added
upstream as firmware will be left intact, and we'll only require
the signature file. The descriptor is implicit by file extension
and the file's own size.

Signed-off-by: Luis R. Rodriguez <mcgrof@suse.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 scripts/sign-file.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/scripts/sign-file.c b/scripts/sign-file.c
index 5b8a6dda3235ab..39aaabe8938844 100755
--- a/scripts/sign-file.c
+++ b/scripts/sign-file.c
@@ -86,13 +86,14 @@ int main(int argc, char **argv)
 	char *hash_algo = NULL;
 	char *private_key_name, *x509_name, *module_name, *dest_name;
 	bool save_pkcs7 = false, replace_orig;
+	bool sign_only = false;
 	unsigned char buf[4096];
 	unsigned long module_size, pkcs7_size;
 	const EVP_MD *digest_algo;
 	EVP_PKEY *private_key;
 	PKCS7 *pkcs7;
 	X509 *x509;
-	BIO *b, *bd, *bm;
+	BIO *b, *bd = NULL, *bm;
 	int opt, n;
 
 	ERR_load_crypto_strings();
@@ -102,6 +103,7 @@ int main(int argc, char **argv)
 		opt = getopt(argc, argv, "dp");
 		switch (opt) {
 		case 'p': save_pkcs7 = true; break;
+		case 'd': sign_only = true; save_pkcs7 = true; break;
 		case -1: break;
 		default: format();
 		}
@@ -148,8 +150,10 @@ int main(int argc, char **argv)
 	/* Open the destination file now so that we can shovel the module data
 	 * across as we read it.
 	 */
-	bd = BIO_new_file(dest_name, "wb");
-	ERR(!bd, "%s", dest_name);
+	if (!sign_only) {
+		bd = BIO_new_file(dest_name, "wb");
+		ERR(!bd, "%s", dest_name);
+	}
 
 	/* Digest the module data. */
 	OpenSSL_add_all_digests();
@@ -180,6 +184,9 @@ int main(int argc, char **argv)
 		BIO_free(b);
 	}
 
+	if (sign_only)
+		return 0;
+
 	/* Append the marker and the PKCS#7 message to the destination file */
 	ERR(BIO_reset(bm) < 0, "%s", module_name);
 	while ((n = BIO_read(bm, buf, sizeof(buf))),

From 1c39449921fc6db1f942051f79868a19c92f4d47 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 20 Jul 2015 21:16:27 +0100
Subject: [PATCH 123/734] system_keyring.c doesn't need to #include
 module-internal.h

system_keyring.c doesn't need to #include module-internal.h as it doesn't use
the one thing that exports.  Remove the inclusion.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 kernel/system_keyring.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/system_keyring.c b/kernel/system_keyring.c
index 875f64e8935bbf..4cda71ee51c7a1 100644
--- a/kernel/system_keyring.c
+++ b/kernel/system_keyring.c
@@ -16,7 +16,6 @@
 #include <linux/err.h>
 #include <keys/asymmetric-type.h>
 #include <keys/system_keyring.h>
-#include "module-internal.h"
 
 struct key *system_trusted_keyring;
 EXPORT_SYMBOL_GPL(system_trusted_keyring);

From 091f6e26eb326adbd718f406e440c838bed8ebb6 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 20 Jul 2015 21:16:28 +0100
Subject: [PATCH 124/734] MODSIGN: Extract the blob PKCS#7 signature verifier
 from module signing

Extract the function that drives the PKCS#7 signature verification given a
data blob and a PKCS#7 blob out from the module signing code and lump it with
the system keyring code as it's generic.  This makes it independent of module
config options and opens it to use by the firmware loader.

Signed-off-by: David Howells <dhowells@redhat.com>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Ming Lei <ming.lei@canonical.com>
Cc: Seth Forshee <seth.forshee@canonical.com>
Cc: Kyle McMartin <kyle@kernel.org>
---
 include/keys/system_keyring.h |  5 ++++
 init/Kconfig                  | 29 +++++++++++++-------
 kernel/module_signing.c       | 44 +-----------------------------
 kernel/system_keyring.c       | 50 +++++++++++++++++++++++++++++++++++
 4 files changed, 75 insertions(+), 53 deletions(-)

diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h
index 72665eb8069269..9791c907cdb70c 100644
--- a/include/keys/system_keyring.h
+++ b/include/keys/system_keyring.h
@@ -28,4 +28,9 @@ static inline struct key *get_system_trusted_keyring(void)
 }
 #endif
 
+#ifdef CONFIG_SYSTEM_DATA_VERIFICATION
+extern int system_verify_data(const void *data, unsigned long len,
+			      const void *raw_pkcs7, size_t pkcs7_len);
+#endif
+
 #endif /* _KEYS_SYSTEM_KEYRING_H */
diff --git a/init/Kconfig b/init/Kconfig
index e16d9e587cee88..14b3d8422502c4 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1752,6 +1752,24 @@ config SYSTEM_TRUSTED_KEYRING
 
 	  Keys in this keyring are used by module signature checking.
 
+config SYSTEM_DATA_VERIFICATION
+	def_bool n
+	select SYSTEM_TRUSTED_KEYRING
+	select KEYS
+	select CRYPTO
+	select ASYMMETRIC_KEY_TYPE
+	select ASYMMETRIC_PUBLIC_KEY_SUBTYPE
+	select PUBLIC_KEY_ALGO_RSA
+	select ASN1
+	select OID_REGISTRY
+	select X509_CERTIFICATE_PARSER
+	select PKCS7_MESSAGE_PARSER
+	help
+	  Provide PKCS#7 message verification using the contents of the system
+	  trusted keyring to provide public keys.  This then can be used for
+	  module verification, kexec image verification and firmware blob
+	  verification.
+
 config PROFILING
 	bool "Profiling support"
 	help
@@ -1860,16 +1878,7 @@ config MODULE_SRCVERSION_ALL
 config MODULE_SIG
 	bool "Module signature verification"
 	depends on MODULES
-	select SYSTEM_TRUSTED_KEYRING
-	select KEYS
-	select CRYPTO
-	select ASYMMETRIC_KEY_TYPE
-	select ASYMMETRIC_PUBLIC_KEY_SUBTYPE
-	select PUBLIC_KEY_ALGO_RSA
-	select ASN1
-	select OID_REGISTRY
-	select X509_CERTIFICATE_PARSER
-	select PKCS7_MESSAGE_PARSER
+	select SYSTEM_DATA_VERIFICATION
 	help
 	  Check modules for valid signatures upon load: the signature
 	  is simply appended to the module. For more information see
diff --git a/kernel/module_signing.c b/kernel/module_signing.c
index 8eb20cc66b3961..70ad463f6df059 100644
--- a/kernel/module_signing.c
+++ b/kernel/module_signing.c
@@ -10,10 +10,8 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/err.h>
 #include <keys/system_keyring.h>
 #include <crypto/public_key.h>
-#include <crypto/pkcs7.h>
 #include "module-internal.h"
 
 /*
@@ -36,46 +34,6 @@ struct module_signature {
 	__be32	sig_len;	/* Length of signature data */
 };
 
-/*
- * Verify a PKCS#7-based signature on a module.
- */
-static int mod_verify_pkcs7(const void *mod, unsigned long modlen,
-			    const void *raw_pkcs7, size_t pkcs7_len)
-{
-	struct pkcs7_message *pkcs7;
-	bool trusted;
-	int ret;
-
-	pkcs7 = pkcs7_parse_message(raw_pkcs7, pkcs7_len);
-	if (IS_ERR(pkcs7))
-		return PTR_ERR(pkcs7);
-
-	/* The data should be detached - so we need to supply it. */
-	if (pkcs7_supply_detached_data(pkcs7, mod, modlen) < 0) {
-		pr_err("PKCS#7 signature with non-detached data\n");
-		ret = -EBADMSG;
-		goto error;
-	}
-
-	ret = pkcs7_verify(pkcs7);
-	if (ret < 0)
-		goto error;
-
-	ret = pkcs7_validate_trust(pkcs7, system_trusted_keyring, &trusted);
-	if (ret < 0)
-		goto error;
-
-	if (!trusted) {
-		pr_err("PKCS#7 signature not signed with a trusted key\n");
-		ret = -ENOKEY;
-	}
-
-error:
-	pkcs7_free_message(pkcs7);
-	pr_devel("<==%s() = %d\n", __func__, ret);
-	return ret;
-}
-
 /*
  * Verify the signature on a module.
  */
@@ -114,5 +72,5 @@ int mod_verify_sig(const void *mod, unsigned long *_modlen)
 		return -EBADMSG;
 	}
 
-	return mod_verify_pkcs7(mod, modlen, mod + modlen, sig_len);
+	return system_verify_data(mod, modlen, mod + modlen, sig_len);
 }
diff --git a/kernel/system_keyring.c b/kernel/system_keyring.c
index 4cda71ee51c7a1..95f2dcbc761626 100644
--- a/kernel/system_keyring.c
+++ b/kernel/system_keyring.c
@@ -16,6 +16,7 @@
 #include <linux/err.h>
 #include <keys/asymmetric-type.h>
 #include <keys/system_keyring.h>
+#include <crypto/pkcs7.h>
 
 struct key *system_trusted_keyring;
 EXPORT_SYMBOL_GPL(system_trusted_keyring);
@@ -103,3 +104,52 @@ static __init int load_system_certificate_list(void)
 	return 0;
 }
 late_initcall(load_system_certificate_list);
+
+#ifdef CONFIG_SYSTEM_DATA_VERIFICATION
+
+/**
+ * Verify a PKCS#7-based signature on system data.
+ * @data: The data to be verified.
+ * @len: Size of @data.
+ * @raw_pkcs7: The PKCS#7 message that is the signature.
+ * @pkcs7_len: The size of @raw_pkcs7.
+ */
+int system_verify_data(const void *data, unsigned long len,
+		       const void *raw_pkcs7, size_t pkcs7_len)
+{
+	struct pkcs7_message *pkcs7;
+	bool trusted;
+	int ret;
+
+	pkcs7 = pkcs7_parse_message(raw_pkcs7, pkcs7_len);
+	if (IS_ERR(pkcs7))
+		return PTR_ERR(pkcs7);
+
+	/* The data should be detached - so we need to supply it. */
+	if (pkcs7_supply_detached_data(pkcs7, data, len) < 0) {
+		pr_err("PKCS#7 signature with non-detached data\n");
+		ret = -EBADMSG;
+		goto error;
+	}
+
+	ret = pkcs7_verify(pkcs7);
+	if (ret < 0)
+		goto error;
+
+	ret = pkcs7_validate_trust(pkcs7, system_trusted_keyring, &trusted);
+	if (ret < 0)
+		goto error;
+
+	if (!trusted) {
+		pr_err("PKCS#7 signature not signed with a trusted key\n");
+		ret = -ENOKEY;
+	}
+
+error:
+	pkcs7_free_message(pkcs7);
+	pr_devel("<==%s() = %d\n", __func__, ret);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(system_verify_data);
+
+#endif /* CONFIG_SYSTEM_DATA_VERIFICATION */

From caf6fe91ddf62a96401e21e9b7a07227440f4185 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Mon, 20 Jul 2015 21:16:28 +0100
Subject: [PATCH 125/734] modsign: Abort modules_install when signing fails

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 scripts/Makefile.modinst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst
index e48a4e9d88682b..07650eeaaf06dd 100644
--- a/scripts/Makefile.modinst
+++ b/scripts/Makefile.modinst
@@ -22,7 +22,7 @@ quiet_cmd_modules_install = INSTALL $@
     mkdir -p $(2) ; \
     cp $@ $(2) ; \
     $(mod_strip_cmd) $(2)/$(notdir $@) ; \
-    $(mod_sign_cmd) $(2)/$(notdir $@) $(patsubst %,|| true,$(KBUILD_EXTMOD)) ; \
+    $(mod_sign_cmd) $(2)/$(notdir $@) $(patsubst %,|| true,$(KBUILD_EXTMOD)) && \
     $(mod_compress_cmd) $(2)/$(notdir $@)
 
 # Modules built outside the kernel source tree go into extra by default

From af1eb2913275c3ab1598b0c24c893499092df08a Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Mon, 20 Jul 2015 21:16:28 +0100
Subject: [PATCH 126/734] modsign: Allow password to be specified for signing
 key

We don't want this in the Kconfig since it might then get exposed in
/proc/config.gz. So make it a parameter to Kbuild instead. This also
means we don't have to jump through hoops to strip quotes from it, as
we would if it was a config option.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
---
 Documentation/kbuild/kbuild.txt  |  5 +++++
 Documentation/module-signing.txt |  3 +++
 scripts/sign-file.c              | 27 ++++++++++++++++++++++++++-
 3 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/Documentation/kbuild/kbuild.txt b/Documentation/kbuild/kbuild.txt
index 6466704d47b5a5..0ff6a466a05b24 100644
--- a/Documentation/kbuild/kbuild.txt
+++ b/Documentation/kbuild/kbuild.txt
@@ -174,6 +174,11 @@ The output directory is often set using "O=..." on the commandline.
 
 The value can be overridden in which case the default value is ignored.
 
+KBUILD_SIGN_PIN
+--------------------------------------------------
+This variable allows a passphrase or PIN to be passed to the sign-file
+utility when signing kernel modules, if the private key requires such.
+
 KBUILD_MODPOST_WARN
 --------------------------------------------------
 KBUILD_MODPOST_WARN can be set to avoid errors in case of undefined
diff --git a/Documentation/module-signing.txt b/Documentation/module-signing.txt
index c72702ec1ded8b..faaa6ea002f7a5 100644
--- a/Documentation/module-signing.txt
+++ b/Documentation/module-signing.txt
@@ -194,6 +194,9 @@ The hash algorithm used does not have to match the one configured, but if it
 doesn't, you should make sure that hash algorithm is either built into the
 kernel or can be loaded without requiring itself.
 
+If the private key requires a passphrase or PIN, it can be provided in the
+$KBUILD_SIGN_PIN environment variable.
+
 
 ============================
 SIGNED MODULES AND STRIPPING
diff --git a/scripts/sign-file.c b/scripts/sign-file.c
index 39aaabe8938844..720b9bc933ae05 100755
--- a/scripts/sign-file.c
+++ b/scripts/sign-file.c
@@ -80,6 +80,27 @@ static void drain_openssl_errors(void)
 		}					\
 	} while(0)
 
+static const char *key_pass;
+
+static int pem_pw_cb(char *buf, int len, int w, void *v)
+{
+	int pwlen;
+
+	if (!key_pass)
+		return -1;
+
+	pwlen = strlen(key_pass);
+	if (pwlen >= len)
+		return -1;
+
+	strcpy(buf, key_pass);
+
+	/* If it's wrong, don't keep trying it. */
+	key_pass = NULL;
+
+	return pwlen;
+}
+
 int main(int argc, char **argv)
 {
 	struct module_signature sig_info = { .id_type = PKEY_ID_PKCS7 };
@@ -96,9 +117,12 @@ int main(int argc, char **argv)
 	BIO *b, *bd = NULL, *bm;
 	int opt, n;
 
+	OpenSSL_add_all_algorithms();
 	ERR_load_crypto_strings();
 	ERR_clear_error();
 
+	key_pass = getenv("KBUILD_SIGN_PIN");
+
 	do {
 		opt = getopt(argc, argv, "dp");
 		switch (opt) {
@@ -132,7 +156,8 @@ int main(int argc, char **argv)
 	 */
 	b = BIO_new_file(private_key_name, "rb");
 	ERR(!b, "%s", private_key_name);
-        private_key = PEM_read_bio_PrivateKey(b, NULL, NULL, NULL);
+	private_key = PEM_read_bio_PrivateKey(b, NULL, pem_pw_cb, NULL);
+	ERR(!private_key, "%s", private_key_name);
 	BIO_free(b);
 
 	b = BIO_new_file(x509_name, "rb");

From 6e3e281f39af78bd680b82d9762bf6c4f8f3f5f4 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Mon, 20 Jul 2015 21:16:29 +0100
Subject: [PATCH 127/734] modsign: Allow signing key to be PKCS#11

This is only the key; the corresponding *cert* still needs to be in
$(topdir)/signing_key.x509. And there's no way to actually use this
from the build system yet.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 scripts/sign-file.c | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/scripts/sign-file.c b/scripts/sign-file.c
index 720b9bc933ae05..ad0aa21bd3ac85 100755
--- a/scripts/sign-file.c
+++ b/scripts/sign-file.c
@@ -22,6 +22,7 @@
 #include <openssl/pem.h>
 #include <openssl/pkcs7.h>
 #include <openssl/err.h>
+#include <openssl/engine.h>
 
 struct module_signature {
 	uint8_t		algo;		/* Public-key crypto algorithm [0] */
@@ -154,11 +155,29 @@ int main(int argc, char **argv)
 	/* Read the private key and the X.509 cert the PKCS#7 message
 	 * will point to.
 	 */
-	b = BIO_new_file(private_key_name, "rb");
-	ERR(!b, "%s", private_key_name);
-	private_key = PEM_read_bio_PrivateKey(b, NULL, pem_pw_cb, NULL);
-	ERR(!private_key, "%s", private_key_name);
-	BIO_free(b);
+	if (!strncmp(private_key_name, "pkcs11:", 7)) {
+		ENGINE *e;
+
+		ENGINE_load_builtin_engines();
+		drain_openssl_errors();
+		e = ENGINE_by_id("pkcs11");
+		ERR(!e, "Load PKCS#11 ENGINE");
+		if (ENGINE_init(e))
+			drain_openssl_errors();
+		else
+			ERR(1, "ENGINE_init");
+		if (key_pass)
+			ERR(!ENGINE_ctrl_cmd_string(e, "PIN", key_pass, 0), "Set PKCS#11 PIN");
+		private_key = ENGINE_load_private_key(e, private_key_name, NULL,
+						      NULL);
+		ERR(!private_key, "%s", private_key_name);
+	} else {
+		b = BIO_new_file(private_key_name, "rb");
+		ERR(!b, "%s", private_key_name);
+		private_key = PEM_read_bio_PrivateKey(b, NULL, pem_pw_cb, NULL);
+		ERR(!private_key, "%s", private_key_name);
+		BIO_free(b);
+	}
 
 	b = BIO_new_file(x509_name, "rb");
 	ERR(!b, "%s", x509_name);

From 19e91b69d77bab16405cc284b451378e89a4110c Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Mon, 20 Jul 2015 21:16:29 +0100
Subject: [PATCH 128/734] modsign: Allow external signing key to be specified

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/module-signing.txt | 31 ++++++++++++++++++++++++++-----
 Makefile                         |  2 +-
 init/Kconfig                     | 14 ++++++++++++++
 kernel/Makefile                  |  5 +++++
 4 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/Documentation/module-signing.txt b/Documentation/module-signing.txt
index faaa6ea002f7a5..84597c7ea17528 100644
--- a/Documentation/module-signing.txt
+++ b/Documentation/module-signing.txt
@@ -88,6 +88,22 @@ This has a number of options available:
      than being a module) so that modules signed with that algorithm can have
      their signatures checked without causing a dependency loop.
 
+ (4) "File name or PKCS#11 URI of module signing key" (CONFIG_MODULE_SIG_KEY)
+
+     Setting this option to something other than its default of
+     "signing_key.priv" will disable the autogeneration of signing keys and
+     allow the kernel modules to be signed with a key of your choosing.
+     The string provided should identify a file containing a private key
+     in PEM form, or — on systems where the OpenSSL ENGINE_pkcs11 is
+     appropriately installed — a PKCS#11 URI as defined by RFC7512.
+
+     If the PEM file containing the private key is encrypted, or if the
+     PKCS#11 token requries a PIN, this can be provided at build time by
+     means of the KBUILD_SIGN_PIN variable.
+
+     The corresponding X.509 certificate in DER form should still be placed
+     in a file named signing_key.x509 in the top-level build directory.
+
 
 =======================
 GENERATING SIGNING KEYS
@@ -100,8 +116,9 @@ it can be deleted or stored securely.  The public key gets built into the
 kernel so that it can be used to check the signatures as the modules are
 loaded.
 
-Under normal conditions, the kernel build will automatically generate a new
-keypair using openssl if one does not exist in the files:
+Under normal conditions, when CONFIG_MODULE_SIG_KEY is unchanged from its
+default of "signing_key.priv", the kernel build will automatically generate
+a new keypair using openssl if one does not exist in the files:
 
 	signing_key.priv
 	signing_key.x509
@@ -135,8 +152,12 @@ kernel sources tree and the openssl command.  The following is an example to
 generate the public/private key files:
 
 	openssl req -new -nodes -utf8 -sha256 -days 36500 -batch -x509 \
-	   -config x509.genkey -outform DER -out signing_key.x509 \
-	   -keyout signing_key.priv
+	   -config x509.genkey -outform PEM -out kernel_key.pem \
+	   -keyout kernel_key.pem
+
+The full pathname for the resulting kernel_key.pem file can then be specified
+in the CONFIG_MODULE_SIG_KEY option, and the certificate and key therein will
+be used instead of an autogenerated keypair.
 
 
 =========================
@@ -181,7 +202,7 @@ To manually sign a module, use the scripts/sign-file tool available in
 the Linux kernel source tree.  The script requires 4 arguments:
 
 	1.  The hash algorithm (e.g., sha256)
-	2.  The private key filename
+	2.  The private key filename or PKCS#11 URI
 	3.  The public key filename
 	4.  The kernel module to be signed
 
diff --git a/Makefile b/Makefile
index dc87ec280fbc28..531dd16c97514e 100644
--- a/Makefile
+++ b/Makefile
@@ -870,7 +870,7 @@ INITRD_COMPRESS-$(CONFIG_RD_LZ4)   := lz4
 # export INITRD_COMPRESS := $(INITRD_COMPRESS-y)
 
 ifdef CONFIG_MODULE_SIG_ALL
-MODSECKEY = ./signing_key.priv
+MODSECKEY = $(CONFIG_MODULE_SIG_KEY)
 MODPUBKEY = ./signing_key.x509
 export MODPUBKEY
 mod_sign_cmd = scripts/sign-file $(CONFIG_MODULE_SIG_HASH) $(MODSECKEY) $(MODPUBKEY)
diff --git a/init/Kconfig b/init/Kconfig
index 14b3d8422502c4..1b1148e9181b65 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1948,6 +1948,20 @@ config MODULE_SIG_HASH
 	default "sha384" if MODULE_SIG_SHA384
 	default "sha512" if MODULE_SIG_SHA512
 
+config MODULE_SIG_KEY
+	string "File name or PKCS#11 URI of module signing key"
+	default "signing_key.priv"
+	depends on MODULE_SIG
+	help
+         Provide the file name of a private key in PKCS#8 PEM format, or
+         a PKCS#11 URI according to RFC7512. The corresponding X.509
+         certificate in DER form should be present in signing_key.x509
+         in the top-level build directory.
+
+         If this option is unchanged from its default "signing_key.priv",
+         then the kernel will automatically generate the private key and
+         certificate as described in Documentation/module-signing.txt
+
 config MODULE_COMPRESS
 	bool "Compress modules on installation"
 	depends on MODULES
diff --git a/kernel/Makefile b/kernel/Makefile
index 43c4c920f30a92..2c937ace292ea1 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -170,6 +170,10 @@ ifndef CONFIG_MODULE_SIG_HASH
 $(error Could not determine digest type to use from kernel config)
 endif
 
+# We do it this way rather than having a boolean option for enabling an
+# external private key, because 'make randconfig' might enable such a
+# boolean option and we unfortunately can't make it depend on !RANDCONFIG.
+ifeq ($(CONFIG_MODULE_SIG_KEY),"signing_key.priv")
 signing_key.priv signing_key.x509: x509.genkey
 	@echo "###"
 	@echo "### Now generating an X.509 key pair to be used for signing modules."
@@ -207,3 +211,4 @@ x509.genkey:
 	@echo >>x509.genkey "subjectKeyIdentifier=hash"
 	@echo >>x509.genkey "authorityKeyIdentifier=keyid"
 endif
+endif

From 1329e8cc69b93a0b1bc6d197b30dcff628c18dbf Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Mon, 20 Jul 2015 21:16:30 +0100
Subject: [PATCH 129/734] modsign: Extract signing cert from
 CONFIG_MODULE_SIG_KEY if needed

Where an external PEM file or PKCS#11 URI is given, we can get the cert
from it for ourselves instead of making the user drop signing_key.x509
in place for us.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/module-signing.txt |  11 ++-
 init/Kconfig                     |   8 +-
 kernel/Makefile                  |  38 +++++++++
 scripts/Makefile                 |   3 +-
 scripts/extract-cert.c           | 132 +++++++++++++++++++++++++++++++
 5 files changed, 181 insertions(+), 11 deletions(-)
 create mode 100644 scripts/extract-cert.c

diff --git a/Documentation/module-signing.txt b/Documentation/module-signing.txt
index 84597c7ea17528..6930019208901b 100644
--- a/Documentation/module-signing.txt
+++ b/Documentation/module-signing.txt
@@ -93,17 +93,16 @@ This has a number of options available:
      Setting this option to something other than its default of
      "signing_key.priv" will disable the autogeneration of signing keys and
      allow the kernel modules to be signed with a key of your choosing.
-     The string provided should identify a file containing a private key
-     in PEM form, or — on systems where the OpenSSL ENGINE_pkcs11 is
-     appropriately installed — a PKCS#11 URI as defined by RFC7512.
+     The string provided should identify a file containing both a private
+     key and its corresponding X.509 certificate in PEM form, or — on
+     systems where the OpenSSL ENGINE_pkcs11 is functional — a PKCS#11 URI
+     as defined by RFC7512. In the latter case, the PKCS#11 URI should
+     reference both a certificate and a private key.
 
      If the PEM file containing the private key is encrypted, or if the
      PKCS#11 token requries a PIN, this can be provided at build time by
      means of the KBUILD_SIGN_PIN variable.
 
-     The corresponding X.509 certificate in DER form should still be placed
-     in a file named signing_key.x509 in the top-level build directory.
-
 
 =======================
 GENERATING SIGNING KEYS
diff --git a/init/Kconfig b/init/Kconfig
index 1b1148e9181b65..e2e0a1d27886df 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1953,10 +1953,10 @@ config MODULE_SIG_KEY
 	default "signing_key.priv"
 	depends on MODULE_SIG
 	help
-         Provide the file name of a private key in PKCS#8 PEM format, or
-         a PKCS#11 URI according to RFC7512. The corresponding X.509
-         certificate in DER form should be present in signing_key.x509
-         in the top-level build directory.
+         Provide the file name of a private key/certificate in PEM format,
+         or a PKCS#11 URI according to RFC7512. The file should contain, or
+         the URI should identify, both the certificate and its corresponding
+         private key.
 
          If this option is unchanged from its default "signing_key.priv",
          then the kernel will automatically generate the private key and
diff --git a/kernel/Makefile b/kernel/Makefile
index 2c937ace292ea1..fa2f8b84b18ab8 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -210,5 +210,43 @@ x509.genkey:
 	@echo >>x509.genkey "keyUsage=digitalSignature"
 	@echo >>x509.genkey "subjectKeyIdentifier=hash"
 	@echo >>x509.genkey "authorityKeyIdentifier=keyid"
+else
+# For external (PKCS#11 or PEM) key, we need to obtain the certificate from
+# CONFIG_MODULE_SIG_KEY automatically.
+quiet_cmd_extract_der = CERT_DER $(2)
+      cmd_extract_der = scripts/extract-cert "$(2)" signing_key.x509
+
+# CONFIG_MODULE_SIG_KEY is either a PKCS#11 URI or a filename. It is
+# surrounded by quotes, and may contain spaces. To strip the quotes
+# with $(patsubst) we need to turn the spaces into something else.
+# And if it's a filename, those spaces need to be escaped as '\ ' in
+# order to use it in dependencies or $(wildcard).
+space :=
+space +=
+space_escape := %%%SPACE%%%
+X509_SOURCE_temp := $(subst $(space),$(space_escape),$(CONFIG_MODULE_SIG_KEY))
+# We need this to check for absolute paths or PKCS#11 URIs.
+X509_SOURCE_ONEWORD := $(patsubst "%",%,$(X509_SOURCE_temp))
+# This is the actual source filename/URI without the quotes
+X509_SOURCE := $(subst $(space_escape),$(space),$(X509_SOURCE_ONEWORD))
+# This\ version\ with\ spaces\ escaped\ for\ $(wildcard)\ and\ dependencies
+X509_SOURCE_ESCAPED := $(subst $(space_escape),\$(space),$(X509_SOURCE_ONEWORD))
+
+ifeq ($(patsubst pkcs11:%,%,$(X509_SOURCE_ONEWORD)),$(X509_SOURCE_ONEWORD))
+# If it's a filename, depend on it.
+X509_DEP := $(X509_SOURCE_ESCAPED)
+ifeq ($(patsubst /%,%,$(X509_SOURCE_ONEWORD)),$(X509_SOURCE_ONEWORD))
+ifeq ($(wildcard $(X509_SOURCE_ESCAPED)),)
+ifneq ($(wildcard $(srctree)/$(X509_SOURCE_ESCAPED)),)
+# Non-absolute filename, found in source tree and not build tree
+X509_SOURCE := $(srctree)/$(X509_SOURCE)
+X509_DEP := $(srctree)/$(X509_SOURCE_ESCAPED)
+endif
+endif
+endif
+endif
+
+signing_key.x509: scripts/extract-cert include/config/module/sig/key.h $(X509_DEP)
+	$(call cmd,extract_der,$(X509_SOURCE))
 endif
 endif
diff --git a/scripts/Makefile b/scripts/Makefile
index b12fe020664df1..236f683510bde3 100644
--- a/scripts/Makefile
+++ b/scripts/Makefile
@@ -16,11 +16,12 @@ hostprogs-$(CONFIG_VT)           += conmakehash
 hostprogs-$(BUILD_C_RECORDMCOUNT) += recordmcount
 hostprogs-$(CONFIG_BUILDTIME_EXTABLE_SORT) += sortextable
 hostprogs-$(CONFIG_ASN1)	 += asn1_compiler
-hostprogs-$(CONFIG_MODULE_SIG)	 += sign-file
+hostprogs-$(CONFIG_MODULE_SIG)	 += sign-file extract-cert
 
 HOSTCFLAGS_sortextable.o = -I$(srctree)/tools/include
 HOSTCFLAGS_asn1_compiler.o = -I$(srctree)/include
 HOSTLOADLIBES_sign-file = -lcrypto
+HOSTLOADLIBES_extract-cert = -lcrypto
 
 always		:= $(hostprogs-y) $(hostprogs-m)
 
diff --git a/scripts/extract-cert.c b/scripts/extract-cert.c
new file mode 100644
index 00000000000000..4fd5b2f07b459e
--- /dev/null
+++ b/scripts/extract-cert.c
@@ -0,0 +1,132 @@
+/* Extract X.509 certificate in DER form from PKCS#11 or PEM.
+ *
+ * Copyright © 2014 Red Hat, Inc. All Rights Reserved.
+ * Copyright © 2015 Intel Corporation.
+ *
+ * Authors: David Howells <dhowells@redhat.com>
+ *          David Woodhouse <dwmw2@infradead.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <getopt.h>
+#include <err.h>
+#include <arpa/inet.h>
+#include <openssl/bio.h>
+#include <openssl/evp.h>
+#include <openssl/pem.h>
+#include <openssl/pkcs7.h>
+#include <openssl/err.h>
+#include <openssl/engine.h>
+
+#define PKEY_ID_PKCS7 2
+
+static __attribute__((noreturn))
+void format(void)
+{
+	fprintf(stderr,
+		"Usage: scripts/extract-cert <source> <dest>\n");
+	exit(2);
+}
+
+static void display_openssl_errors(int l)
+{
+	const char *file;
+	char buf[120];
+	int e, line;
+
+	if (ERR_peek_error() == 0)
+		return;
+	fprintf(stderr, "At main.c:%d:\n", l);
+
+	while ((e = ERR_get_error_line(&file, &line))) {
+		ERR_error_string(e, buf);
+		fprintf(stderr, "- SSL %s: %s:%d\n", buf, file, line);
+	}
+}
+
+static void drain_openssl_errors(void)
+{
+	const char *file;
+	int line;
+
+	if (ERR_peek_error() == 0)
+		return;
+	while (ERR_get_error_line(&file, &line)) {}
+}
+
+#define ERR(cond, fmt, ...)				\
+	do {						\
+		bool __cond = (cond);			\
+		display_openssl_errors(__LINE__);	\
+		if (__cond) {				\
+			err(1, fmt, ## __VA_ARGS__);	\
+		}					\
+	} while(0)
+
+static const char *key_pass;
+
+int main(int argc, char **argv)
+{
+	char *cert_src, *cert_dst;
+	X509 *x509;
+	BIO *b;
+
+	OpenSSL_add_all_algorithms();
+	ERR_load_crypto_strings();
+	ERR_clear_error();
+
+        key_pass = getenv("KBUILD_SIGN_PIN");
+
+	if (argc != 3)
+		format();
+
+	cert_src = argv[1];
+	cert_dst = argv[2];
+
+	if (!strncmp(cert_src, "pkcs11:", 7)) {
+		ENGINE *e;
+		struct {
+			const char *cert_id;
+			X509 *cert;
+		} parms;
+
+		parms.cert_id = cert_src;
+		parms.cert = NULL;
+
+		ENGINE_load_builtin_engines();
+		drain_openssl_errors();
+		e = ENGINE_by_id("pkcs11");
+		ERR(!e, "Load PKCS#11 ENGINE");
+		if (ENGINE_init(e))
+			drain_openssl_errors();
+		else
+			ERR(1, "ENGINE_init");
+		if (key_pass)
+			ERR(!ENGINE_ctrl_cmd_string(e, "PIN", key_pass, 0), "Set PKCS#11 PIN");
+		ENGINE_ctrl_cmd(e, "LOAD_CERT_CTRL", 0, &parms, NULL, 1);
+		ERR(!parms.cert, "Get X.509 from PKCS#11");
+		x509 = parms.cert;
+	} else {
+		b = BIO_new_file(cert_src, "rb");
+		ERR(!b, "%s", cert_src);
+		x509 = PEM_read_bio_X509(b, NULL, NULL, NULL);
+		ERR(!x509, "%s", cert_src);
+		BIO_free(b);
+	}
+
+	b = BIO_new_file(cert_dst, "wb");
+	ERR(!b, "%s", cert_dst);
+	ERR(!i2d_X509_bio(b, x509), cert_dst);
+	BIO_free(b);
+
+	return 0;
+}

From fb1179499134bc718dc7557c7a6a95dc72f224cb Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Mon, 20 Jul 2015 21:16:30 +0100
Subject: [PATCH 130/734] modsign: Use single PEM file for autogenerated key

The current rule for generating signing_key.priv and signing_key.x509 is
a classic example of a bad rule which has a tendency to break parallel
make. When invoked to create *either* target, it generates the other
target as a side-effect that make didn't predict.

So let's switch to using a single file signing_key.pem which contains
both key and certificate. That matches what we do in the case of an
external key specified by CONFIG_MODULE_SIG_KEY anyway, so it's also
slightly cleaner.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 .gitignore                       |  1 +
 Documentation/module-signing.txt |  9 ++++-----
 Makefile                         |  4 ++--
 init/Kconfig                     |  4 ++--
 kernel/Makefile                  | 15 +++++++--------
 5 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/.gitignore b/.gitignore
index 4ad4a98b884b9c..17fa24dd7e4606 100644
--- a/.gitignore
+++ b/.gitignore
@@ -97,6 +97,7 @@ GTAGS
 # Leavings from module signing
 #
 extra_certificates
+signing_key.pem
 signing_key.priv
 signing_key.x509
 x509.genkey
diff --git a/Documentation/module-signing.txt b/Documentation/module-signing.txt
index 6930019208901b..5d5e4e32dc260b 100644
--- a/Documentation/module-signing.txt
+++ b/Documentation/module-signing.txt
@@ -91,7 +91,7 @@ This has a number of options available:
  (4) "File name or PKCS#11 URI of module signing key" (CONFIG_MODULE_SIG_KEY)
 
      Setting this option to something other than its default of
-     "signing_key.priv" will disable the autogeneration of signing keys and
+     "signing_key.pem" will disable the autogeneration of signing keys and
      allow the kernel modules to be signed with a key of your choosing.
      The string provided should identify a file containing both a private
      key and its corresponding X.509 certificate in PEM form, or — on
@@ -116,11 +116,10 @@ kernel so that it can be used to check the signatures as the modules are
 loaded.
 
 Under normal conditions, when CONFIG_MODULE_SIG_KEY is unchanged from its
-default of "signing_key.priv", the kernel build will automatically generate
-a new keypair using openssl if one does not exist in the files:
+default, the kernel build will automatically generate a new keypair using
+openssl if one does not exist in the file:
 
-	signing_key.priv
-	signing_key.x509
+	signing_key.pem
 
 during the building of vmlinux (the public part of the key needs to be built
 into vmlinux) using parameters in the:
diff --git a/Makefile b/Makefile
index 531dd16c97514e..6ab99d8cc23c01 100644
--- a/Makefile
+++ b/Makefile
@@ -1173,8 +1173,8 @@ MRPROPER_DIRS  += include/config usr/include include/generated          \
 		  arch/*/include/generated .tmp_objdiff
 MRPROPER_FILES += .config .config.old .version .old_version \
 		  Module.symvers tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS \
-		  signing_key.priv signing_key.x509 x509.genkey		\
-		  extra_certificates signing_key.x509.keyid		\
+		  signing_key.pem signing_key.priv signing_key.x509	\
+		  x509.genkey extra_certificates signing_key.x509.keyid	\
 		  signing_key.x509.signer vmlinux-gdb.py
 
 # clean - Delete most, but leave enough to build external modules
diff --git a/init/Kconfig b/init/Kconfig
index e2e0a1d27886df..2b119850784bba 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1950,7 +1950,7 @@ config MODULE_SIG_HASH
 
 config MODULE_SIG_KEY
 	string "File name or PKCS#11 URI of module signing key"
-	default "signing_key.priv"
+	default "signing_key.pem"
 	depends on MODULE_SIG
 	help
          Provide the file name of a private key/certificate in PEM format,
@@ -1958,7 +1958,7 @@ config MODULE_SIG_KEY
          the URI should identify, both the certificate and its corresponding
          private key.
 
-         If this option is unchanged from its default "signing_key.priv",
+         If this option is unchanged from its default "signing_key.pem",
          then the kernel will automatically generate the private key and
          certificate as described in Documentation/module-signing.txt
 
diff --git a/kernel/Makefile b/kernel/Makefile
index fa2f8b84b18ab8..7453283981ca8e 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -173,8 +173,8 @@ endif
 # We do it this way rather than having a boolean option for enabling an
 # external private key, because 'make randconfig' might enable such a
 # boolean option and we unfortunately can't make it depend on !RANDCONFIG.
-ifeq ($(CONFIG_MODULE_SIG_KEY),"signing_key.priv")
-signing_key.priv signing_key.x509: x509.genkey
+ifeq ($(CONFIG_MODULE_SIG_KEY),"signing_key.pem")
+signing_key.pem: x509.genkey
 	@echo "###"
 	@echo "### Now generating an X.509 key pair to be used for signing modules."
 	@echo "###"
@@ -185,8 +185,8 @@ signing_key.priv signing_key.x509: x509.genkey
 	@echo "###"
 	openssl req -new -nodes -utf8 -$(CONFIG_MODULE_SIG_HASH) -days 36500 \
 		-batch -x509 -config x509.genkey \
-		-outform DER -out signing_key.x509 \
-		-keyout signing_key.priv 2>&1
+		-outform PEM -out signing_key.pem \
+		-keyout signing_key.pem 2>&1
 	@echo "###"
 	@echo "### Key pair generated."
 	@echo "###"
@@ -210,9 +210,9 @@ x509.genkey:
 	@echo >>x509.genkey "keyUsage=digitalSignature"
 	@echo >>x509.genkey "subjectKeyIdentifier=hash"
 	@echo >>x509.genkey "authorityKeyIdentifier=keyid"
-else
-# For external (PKCS#11 or PEM) key, we need to obtain the certificate from
-# CONFIG_MODULE_SIG_KEY automatically.
+endif
+
+# We need to obtain the certificate from CONFIG_MODULE_SIG_KEY.
 quiet_cmd_extract_der = CERT_DER $(2)
       cmd_extract_der = scripts/extract-cert "$(2)" signing_key.x509
 
@@ -249,4 +249,3 @@ endif
 signing_key.x509: scripts/extract-cert include/config/module/sig/key.h $(X509_DEP)
 	$(call cmd,extract_der,$(X509_SOURCE))
 endif
-endif

From 99d27b1b52bd5cdf9bd9f7661ca8641e9a1b55e6 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Mon, 20 Jul 2015 21:16:31 +0100
Subject: [PATCH 131/734] modsign: Add explicit CONFIG_SYSTEM_TRUSTED_KEYS
 option

Let the user explicitly provide a file containing trusted keys, instead of
just automatically finding files matching *.x509 in the build tree and
trusting whatever we find. This really ought to be an *explicit*
configuration, and the build rules for dealing with the files were
fairly painful too.

Fix applied from James Morris that removes an '=' from a macro definition
in kernel/Makefile as this is a feature that only exists from GNU make 3.82
onwards.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/module-signing.txt |  15 +++-
 init/Kconfig                     |  13 ++++
 kernel/Makefile                  | 125 ++++++++++++++++---------------
 3 files changed, 89 insertions(+), 64 deletions(-)

diff --git a/Documentation/module-signing.txt b/Documentation/module-signing.txt
index 5d5e4e32dc260b..4e62bc29666ec5 100644
--- a/Documentation/module-signing.txt
+++ b/Documentation/module-signing.txt
@@ -88,6 +88,7 @@ This has a number of options available:
      than being a module) so that modules signed with that algorithm can have
      their signatures checked without causing a dependency loop.
 
+
  (4) "File name or PKCS#11 URI of module signing key" (CONFIG_MODULE_SIG_KEY)
 
      Setting this option to something other than its default of
@@ -104,6 +105,13 @@ This has a number of options available:
      means of the KBUILD_SIGN_PIN variable.
 
 
+ (5) "Additional X.509 keys for default system keyring" (CONFIG_SYSTEM_TRUSTED_KEYS)
+
+     This option can be set to the filename of a PEM-encoded file containing
+     additional certificates which will be included in the system keyring by
+     default.
+
+
 =======================
 GENERATING SIGNING KEYS
 =======================
@@ -171,10 +179,9 @@ in a keyring called ".system_keyring" that can be seen by:
 	302d2d52 I------     1 perm 1f010000     0     0 asymmetri Fedora kernel signing key: d69a84e6bce3d216b979e9505b3e3ef9a7118079: X509.RSA a7118079 []
 	...
 
-Beyond the public key generated specifically for module signing, any file
-placed in the kernel source root directory or the kernel build root directory
-whose name is suffixed with ".x509" will be assumed to be an X.509 public key
-and will be added to the keyring.
+Beyond the public key generated specifically for module signing, additional
+trusted certificates can be provided in a PEM-encoded file referenced by the
+CONFIG_SYSTEM_TRUSTED_KEYS configuration option.
 
 Further, the architecture code may take public keys from a hardware store and
 add those in also (e.g. from the UEFI key database).
diff --git a/init/Kconfig b/init/Kconfig
index 2b119850784bba..62b725653c36db 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1752,6 +1752,19 @@ config SYSTEM_TRUSTED_KEYRING
 
 	  Keys in this keyring are used by module signature checking.
 
+config SYSTEM_TRUSTED_KEYS
+	string "Additional X.509 keys for default system keyring"
+	depends on SYSTEM_TRUSTED_KEYRING
+	help
+	  If set, this option should be the filename of a PEM-formatted file
+	  containing trusted X.509 certificates to be included in the default
+	  system keyring. Any certificate used for module signing is implicitly
+	  also trusted.
+
+	  NOTE: If you previously provided keys for the system keyring in the
+	  form of DER-encoded *.x509 files in the top-level build directory,
+	  those are no longer used. You will need to set this option instead.
+
 config SYSTEM_DATA_VERIFICATION
 	def_bool n
 	select SYSTEM_TRUSTED_KEYRING
diff --git a/kernel/Makefile b/kernel/Makefile
index 7453283981ca8e..575329777d9e2b 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -114,46 +114,75 @@ $(obj)/config_data.h: $(obj)/config_data.gz FORCE
 
 ###############################################################################
 #
-# Roll all the X.509 certificates that we can find together and pull them into
-# the kernel so that they get loaded into the system trusted keyring during
-# boot.
+# When a Kconfig string contains a filename, it is suitable for
+# passing to shell commands. It is surrounded by double-quotes, and
+# any double-quotes or backslashes within it are escaped by
+# backslashes.
 #
-# We look in the source root and the build root for all files whose name ends
-# in ".x509".  Unfortunately, this will generate duplicate filenames, so we
-# have make canonicalise the pathnames and then sort them to discard the
-# duplicates.
+# This is no use for dependencies or $(wildcard). We need to strip the
+# surrounding quotes and the escaping from quotes and backslashes, and
+# we *do* need to escape any spaces in the string. So, for example:
+#
+# Usage: $(eval $(call config_filename,FOO))
+#
+# Defines FOO_FILENAME based on the contents of the CONFIG_FOO option,
+# transformed as described above to be suitable for use within the
+# makefile.
+#
+# Also, if the filename is a relative filename and exists in the source
+# tree but not the build tree, define FOO_SRCPREFIX as $(srctree)/ to
+# be prefixed to *both* command invocation and dependencies.
+#
+# Note: We also print the filenames in the quiet_cmd_foo text, and
+# perhaps ought to have a version specially escaped for that purpose.
+# But it's only cosmetic, and $(patsubst "%",%,$(CONFIG_FOO)) is good
+# enough.  It'll strip the quotes in the common case where there's no
+# space and it's a simple filename, and it'll retain the quotes when
+# there's a space. There are some esoteric cases in which it'll print
+# the wrong thing, but we don't really care. The actual dependencies
+# and commands *do* get it right, with various combinations of single
+# and double quotes, backslashes and spaces in the filenames.
 #
 ###############################################################################
-ifeq ($(CONFIG_SYSTEM_TRUSTED_KEYRING),y)
-X509_CERTIFICATES-y := $(wildcard *.x509) $(wildcard $(srctree)/*.x509)
-X509_CERTIFICATES-$(CONFIG_MODULE_SIG) += $(objtree)/signing_key.x509
-X509_CERTIFICATES-raw := $(sort $(foreach CERT,$(X509_CERTIFICATES-y), \
-				$(or $(realpath $(CERT)),$(CERT))))
-X509_CERTIFICATES := $(subst $(realpath $(objtree))/,,$(X509_CERTIFICATES-raw))
-
-ifeq ($(X509_CERTIFICATES),)
-$(warning *** No X.509 certificates found ***)
+#
+quote := $(firstword " ")
+space :=
+space +=
+space_escape := %%%SPACE%%%
+#
+define config_filename
+ifneq ($$(CONFIG_$(1)),"")
+$(1)_FILENAME := $$(subst \\,\,$$(subst \$$(quote),$$(quote),$$(subst $$(space_escape),\$$(space),$$(patsubst "%",%,$$(subst $$(space),$$(space_escape),$$(CONFIG_$(1)))))))
+ifneq ($$(patsubst /%,%,$$(firstword $$($(1)_FILENAME))),$$(firstword $$($(1)_FILENAME)))
+else
+ifeq ($$(wildcard $$($(1)_FILENAME)),)
+ifneq ($$(wildcard $$(srctree)/$$($(1)_FILENAME)),)
+$(1)_SRCPREFIX := $(srctree)/
+endif
 endif
-
-ifneq ($(wildcard $(obj)/.x509.list),)
-ifneq ($(shell cat $(obj)/.x509.list),$(X509_CERTIFICATES))
-$(warning X.509 certificate list changed to "$(X509_CERTIFICATES)" from "$(shell cat $(obj)/.x509.list)")
-$(shell rm $(obj)/.x509.list)
 endif
 endif
+endef
+#
+###############################################################################
+
+
+ifeq ($(CONFIG_SYSTEM_TRUSTED_KEYRING),y)
+
+$(eval $(call config_filename,SYSTEM_TRUSTED_KEYS))
+
+SIGNING_X509-$(CONFIG_MODULE_SIG) += signing_key.x509
 
 kernel/system_certificates.o: $(obj)/x509_certificate_list
 
-quiet_cmd_x509certs  = CERTS   $@
-      cmd_x509certs  = cat $(X509_CERTIFICATES) /dev/null >$@ $(foreach X509,$(X509_CERTIFICATES),; $(kecho) "  - Including cert $(X509)")
+quiet_cmd_x509certs  = CERTS   $(SIGNING_X509-y) $(patsubst "%",%,$(2))
+      cmd_x509certs  = ( cat $(SIGNING_X509-y) /dev/null; \
+			 awk '/-----BEGIN CERTIFICATE-----/{flag=1;next}/-----END CERTIFICATE-----/{flag=0}flag' $(2) /dev/null | base64 -d ) > $@ || ( rm $@; exit 1)
 
 targets += $(obj)/x509_certificate_list
-$(obj)/x509_certificate_list: $(X509_CERTIFICATES) $(obj)/.x509.list
-	$(call if_changed,x509certs)
+$(obj)/x509_certificate_list: $(SIGNING_X509-y) include/config/system/trusted/keys.h $(wildcard include/config/module/sig.h) $(SYSTEM_TRUSTED_KEYS_SRCPREFIX)$(SYSTEM_TRUSTED_KEYS_FILENAME)
+	$(call if_changed,x509certs,$(SYSTEM_TRUSTED_KEYS_SRCPREFIX)$(CONFIG_SYSTEM_TRUSTED_KEYS))
 
-targets += $(obj)/.x509.list
-$(obj)/.x509.list:
-	@echo $(X509_CERTIFICATES) >$@
 endif
 
 clean-files := x509_certificate_list .x509.list
@@ -212,40 +241,16 @@ x509.genkey:
 	@echo >>x509.genkey "authorityKeyIdentifier=keyid"
 endif
 
-# We need to obtain the certificate from CONFIG_MODULE_SIG_KEY.
-quiet_cmd_extract_der = CERT_DER $(2)
-      cmd_extract_der = scripts/extract-cert "$(2)" signing_key.x509
+$(eval $(call config_filename,MODULE_SIG_KEY))
 
-# CONFIG_MODULE_SIG_KEY is either a PKCS#11 URI or a filename. It is
-# surrounded by quotes, and may contain spaces. To strip the quotes
-# with $(patsubst) we need to turn the spaces into something else.
-# And if it's a filename, those spaces need to be escaped as '\ ' in
-# order to use it in dependencies or $(wildcard).
-space :=
-space +=
-space_escape := %%%SPACE%%%
-X509_SOURCE_temp := $(subst $(space),$(space_escape),$(CONFIG_MODULE_SIG_KEY))
-# We need this to check for absolute paths or PKCS#11 URIs.
-X509_SOURCE_ONEWORD := $(patsubst "%",%,$(X509_SOURCE_temp))
-# This is the actual source filename/URI without the quotes
-X509_SOURCE := $(subst $(space_escape),$(space),$(X509_SOURCE_ONEWORD))
-# This\ version\ with\ spaces\ escaped\ for\ $(wildcard)\ and\ dependencies
-X509_SOURCE_ESCAPED := $(subst $(space_escape),\$(space),$(X509_SOURCE_ONEWORD))
-
-ifeq ($(patsubst pkcs11:%,%,$(X509_SOURCE_ONEWORD)),$(X509_SOURCE_ONEWORD))
-# If it's a filename, depend on it.
-X509_DEP := $(X509_SOURCE_ESCAPED)
-ifeq ($(patsubst /%,%,$(X509_SOURCE_ONEWORD)),$(X509_SOURCE_ONEWORD))
-ifeq ($(wildcard $(X509_SOURCE_ESCAPED)),)
-ifneq ($(wildcard $(srctree)/$(X509_SOURCE_ESCAPED)),)
-# Non-absolute filename, found in source tree and not build tree
-X509_SOURCE := $(srctree)/$(X509_SOURCE)
-X509_DEP := $(srctree)/$(X509_SOURCE_ESCAPED)
-endif
-endif
-endif
+# If CONFIG_MODULE_SIG_KEY isn't a PKCS#11 URI, depend on it
+ifeq ($(patsubst pkcs11:%,%,$(firstword $(MODULE_SIG_KEY_FILENAME))),$(firstword $(MODULE_SIG_KEY_FILENAME)))
+X509_DEP := $(MODULE_SIG_KEY_SRCPREFIX)$(MODULE_SIG_KEY_FILENAME)
 endif
 
+quiet_cmd_extract_der = SIGNING_CERT $(patsubst "%",%,$(2))
+      cmd_extract_der = scripts/extract-cert $(2) signing_key.x509
+
 signing_key.x509: scripts/extract-cert include/config/module/sig/key.h $(X509_DEP)
-	$(call cmd,extract_der,$(X509_SOURCE))
+	$(call cmd,extract_der,$(MODULE_SIG_KEY_SRCPREFIX)$(CONFIG_MODULE_SIG_KEY))
 endif

From c5f3d544a7f29b16de37c10f51b38c4b7ad31804 Mon Sep 17 00:00:00 2001
From: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Date: Thu, 18 Jun 2015 18:33:46 +0530
Subject: [PATCH 132/734] i2c: parport: use dev_*

Now parport is using device model so use dev_* macros instead of printk.

Signed-off-by: Sudip Mukherjee <sudip@vectorindia.org>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-parport.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/i2c/busses/i2c-parport.c b/drivers/i2c/busses/i2c-parport.c
index 9b94c3db80abf2..3794f7d1844298 100644
--- a/drivers/i2c/busses/i2c-parport.c
+++ b/drivers/i2c/busses/i2c-parport.c
@@ -215,7 +215,8 @@ static void i2c_parport_attach(struct parport *port)
 	adapter->adapter.dev.parent = port->physport->dev;
 
 	if (parport_claim_or_block(adapter->pdev) < 0) {
-		printk(KERN_ERR "i2c-parport: Could not claim parallel port\n");
+		dev_err(&adapter->pdev->dev,
+			"Could not claim parallel port\n");
 		goto err_unregister;
 	}
 
@@ -230,7 +231,7 @@ static void i2c_parport_attach(struct parport *port)
 	}
 
 	if (i2c_bit_add_bus(&adapter->adapter) < 0) {
-		printk(KERN_ERR "i2c-parport: Unable to register with I2C\n");
+		dev_err(&adapter->pdev->dev, "Unable to register with I2C\n");
 		goto err_unregister;
 	}
 
@@ -242,8 +243,8 @@ static void i2c_parport_attach(struct parport *port)
 		if (adapter->ara)
 			parport_enable_irq(port);
 		else
-			printk(KERN_WARNING "i2c-parport: Failed to register "
-			       "ARA client\n");
+			dev_warn(&adapter->pdev->dev,
+				 "Failed to register ARA client\n");
 	}
 
 	/* Add the new adapter to the list */

From 20226118bceed7b432f4bc5bae34c1c160689bf7 Mon Sep 17 00:00:00 2001
From: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Date: Thu, 18 Jun 2015 18:33:47 +0530
Subject: [PATCH 133/734] i2c: parport: start using pr_fmt

Start using pr_fmt and convert all remaining printk to use
pr_* family of macros.

Signed-off-by: Sudip Mukherjee <sudip@vectorindia.org>
[wsa: remove print on kzalloc failure]
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-parport.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/i2c/busses/i2c-parport.c b/drivers/i2c/busses/i2c-parport.c
index 3794f7d1844298..a8e54df4aed6f9 100644
--- a/drivers/i2c/busses/i2c-parport.c
+++ b/drivers/i2c/busses/i2c-parport.c
@@ -20,6 +20,8 @@
    GNU General Public License for more details.
  * ------------------------------------------------------------------------ */
 
+#define pr_fmt(fmt) "i2c-parport: " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -176,26 +178,24 @@ static void i2c_parport_attach(struct parport *port)
 			break;
 	}
 	if (i == MAX_DEVICE) {
-		pr_debug("i2c-parport: Not using parport%d.\n", port->number);
+		pr_debug("Not using parport%d.\n", port->number);
 		return;
 	}
 
 	adapter = kzalloc(sizeof(struct i2c_par), GFP_KERNEL);
-	if (adapter == NULL) {
-		printk(KERN_ERR "i2c-parport: Failed to kzalloc\n");
+	if (!adapter)
 		return;
-	}
 	memset(&i2c_parport_cb, 0, sizeof(i2c_parport_cb));
 	i2c_parport_cb.flags = PARPORT_FLAG_EXCL;
 	i2c_parport_cb.irq_func = i2c_parport_irq;
 	i2c_parport_cb.private = adapter;
 
-	pr_debug("i2c-parport: attaching to %s\n", port->name);
+	pr_debug("attaching to %s\n", port->name);
 	parport_disable_irq(port);
 	adapter->pdev = parport_register_dev_model(port, "i2c-parport",
 						   &i2c_parport_cb, i);
 	if (!adapter->pdev) {
-		printk(KERN_ERR "i2c-parport: Unable to register with parport\n");
+		pr_err("Unable to register with parport\n");
 		goto err_free;
 	}
 
@@ -299,12 +299,12 @@ static struct parport_driver i2c_parport_driver = {
 static int __init i2c_parport_init(void)
 {
 	if (type < 0) {
-		printk(KERN_WARNING "i2c-parport: adapter type unspecified\n");
+		pr_warn("adapter type unspecified\n");
 		return -ENODEV;
 	}
 
 	if (type >= ARRAY_SIZE(adapter_parm)) {
-		printk(KERN_WARNING "i2c-parport: invalid type (%d)\n", type);
+		pr_warn("invalid type (%d)\n", type);
 		return -ENODEV;
 	}
 

From c680e3291857216465714ba1d3336befaa5b4725 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Mon, 13 Jul 2015 15:38:02 -0500
Subject: [PATCH 134/734] i2c: omap: switch to dev_get_drvdata()

there's no need to fetch the platform_device
in order to dereference it back to the dev
pointer to access drvdata, we can use
dev_get_drvdata() instead.

Signed-off-by: Felipe Balbi <balbi@ti.com>
Acked-by: Alexander Sverdlin <alexander.sverdlin@nokia.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-omap.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index fc9bf7f30e355d..ce34f431a3ac4c 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -1476,8 +1476,7 @@ static int omap_i2c_remove(struct platform_device *pdev)
 #ifdef CONFIG_PM
 static int omap_i2c_runtime_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct omap_i2c_dev *_dev = platform_get_drvdata(pdev);
+	struct omap_i2c_dev *_dev = dev_get_drvdata(dev);
 
 	_dev->iestate = omap_i2c_read_reg(_dev, OMAP_I2C_IE_REG);
 
@@ -1503,8 +1502,7 @@ static int omap_i2c_runtime_suspend(struct device *dev)
 
 static int omap_i2c_runtime_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct omap_i2c_dev *_dev = platform_get_drvdata(pdev);
+	struct omap_i2c_dev *_dev = dev_get_drvdata(dev);
 
 	pinctrl_pm_select_default_state(dev);
 

From 63f8f85625fa726bdf28862dd4d2e1bf3d230bb8 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Mon, 13 Jul 2015 15:38:03 -0500
Subject: [PATCH 135/734] i2c: omap: abolish variable name confusion

struct device pointers are usually called
dev. Calling our struct omap_i2c_dev pointers
also dev has caused enough confusion.

This is the result of a few simple sed rules
to convert all struct omap_i2c_dev pointers
to be called omap instead.

Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-omap.c | 598 +++++++++++++++++-----------------
 1 file changed, 299 insertions(+), 299 deletions(-)

diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index ce34f431a3ac4c..e359ad39753b0a 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -270,35 +270,35 @@ static const u8 reg_map_ip_v2[] = {
 	[OMAP_I2C_IP_V2_IRQENABLE_CLR] = 0x30,
 };
 
-static inline void omap_i2c_write_reg(struct omap_i2c_dev *i2c_dev,
+static inline void omap_i2c_write_reg(struct omap_i2c_dev *omap,
 				      int reg, u16 val)
 {
-	writew_relaxed(val, i2c_dev->base +
-			(i2c_dev->regs[reg] << i2c_dev->reg_shift));
+	writew_relaxed(val, omap->base +
+			(omap->regs[reg] << omap->reg_shift));
 }
 
-static inline u16 omap_i2c_read_reg(struct omap_i2c_dev *i2c_dev, int reg)
+static inline u16 omap_i2c_read_reg(struct omap_i2c_dev *omap, int reg)
 {
-	return readw_relaxed(i2c_dev->base +
-				(i2c_dev->regs[reg] << i2c_dev->reg_shift));
+	return readw_relaxed(omap->base +
+				(omap->regs[reg] << omap->reg_shift));
 }
 
-static void __omap_i2c_init(struct omap_i2c_dev *dev)
+static void __omap_i2c_init(struct omap_i2c_dev *omap)
 {
 
-	omap_i2c_write_reg(dev, OMAP_I2C_CON_REG, 0);
+	omap_i2c_write_reg(omap, OMAP_I2C_CON_REG, 0);
 
 	/* Setup clock prescaler to obtain approx 12MHz I2C module clock: */
-	omap_i2c_write_reg(dev, OMAP_I2C_PSC_REG, dev->pscstate);
+	omap_i2c_write_reg(omap, OMAP_I2C_PSC_REG, omap->pscstate);
 
 	/* SCL low and high time values */
-	omap_i2c_write_reg(dev, OMAP_I2C_SCLL_REG, dev->scllstate);
-	omap_i2c_write_reg(dev, OMAP_I2C_SCLH_REG, dev->sclhstate);
-	if (dev->rev >= OMAP_I2C_REV_ON_3430_3530)
-		omap_i2c_write_reg(dev, OMAP_I2C_WE_REG, dev->westate);
+	omap_i2c_write_reg(omap, OMAP_I2C_SCLL_REG, omap->scllstate);
+	omap_i2c_write_reg(omap, OMAP_I2C_SCLH_REG, omap->sclhstate);
+	if (omap->rev >= OMAP_I2C_REV_ON_3430_3530)
+		omap_i2c_write_reg(omap, OMAP_I2C_WE_REG, omap->westate);
 
 	/* Take the I2C module out of reset: */
-	omap_i2c_write_reg(dev, OMAP_I2C_CON_REG, OMAP_I2C_CON_EN);
+	omap_i2c_write_reg(omap, OMAP_I2C_CON_REG, OMAP_I2C_CON_EN);
 
 	/*
 	 * NOTE: right after setting CON_EN, STAT_BB could be 0 while the
@@ -310,32 +310,32 @@ static void __omap_i2c_init(struct omap_i2c_dev *dev)
 	 * Don't write to this register if the IE state is 0 as it can
 	 * cause deadlock.
 	 */
-	if (dev->iestate)
-		omap_i2c_write_reg(dev, OMAP_I2C_IE_REG, dev->iestate);
+	if (omap->iestate)
+		omap_i2c_write_reg(omap, OMAP_I2C_IE_REG, omap->iestate);
 }
 
-static int omap_i2c_reset(struct omap_i2c_dev *dev)
+static int omap_i2c_reset(struct omap_i2c_dev *omap)
 {
 	unsigned long timeout;
 	u16 sysc;
 
-	if (dev->rev >= OMAP_I2C_OMAP1_REV_2) {
-		sysc = omap_i2c_read_reg(dev, OMAP_I2C_SYSC_REG);
+	if (omap->rev >= OMAP_I2C_OMAP1_REV_2) {
+		sysc = omap_i2c_read_reg(omap, OMAP_I2C_SYSC_REG);
 
 		/* Disable I2C controller before soft reset */
-		omap_i2c_write_reg(dev, OMAP_I2C_CON_REG,
-			omap_i2c_read_reg(dev, OMAP_I2C_CON_REG) &
+		omap_i2c_write_reg(omap, OMAP_I2C_CON_REG,
+			omap_i2c_read_reg(omap, OMAP_I2C_CON_REG) &
 				~(OMAP_I2C_CON_EN));
 
-		omap_i2c_write_reg(dev, OMAP_I2C_SYSC_REG, SYSC_SOFTRESET_MASK);
+		omap_i2c_write_reg(omap, OMAP_I2C_SYSC_REG, SYSC_SOFTRESET_MASK);
 		/* For some reason we need to set the EN bit before the
 		 * reset done bit gets set. */
 		timeout = jiffies + OMAP_I2C_TIMEOUT;
-		omap_i2c_write_reg(dev, OMAP_I2C_CON_REG, OMAP_I2C_CON_EN);
-		while (!(omap_i2c_read_reg(dev, OMAP_I2C_SYSS_REG) &
+		omap_i2c_write_reg(omap, OMAP_I2C_CON_REG, OMAP_I2C_CON_EN);
+		while (!(omap_i2c_read_reg(omap, OMAP_I2C_SYSS_REG) &
 			 SYSS_RESETDONE_MASK)) {
 			if (time_after(jiffies, timeout)) {
-				dev_warn(dev->dev, "timeout waiting "
+				dev_warn(omap->dev, "timeout waiting "
 						"for controller reset\n");
 				return -ETIMEDOUT;
 			}
@@ -343,18 +343,18 @@ static int omap_i2c_reset(struct omap_i2c_dev *dev)
 		}
 
 		/* SYSC register is cleared by the reset; rewrite it */
-		omap_i2c_write_reg(dev, OMAP_I2C_SYSC_REG, sysc);
+		omap_i2c_write_reg(omap, OMAP_I2C_SYSC_REG, sysc);
 
-		if (dev->rev > OMAP_I2C_REV_ON_3430_3530) {
+		if (omap->rev > OMAP_I2C_REV_ON_3430_3530) {
 			/* Schedule I2C-bus monitoring on the next transfer */
-			dev->bb_valid = 0;
+			omap->bb_valid = 0;
 		}
 	}
 
 	return 0;
 }
 
-static int omap_i2c_init(struct omap_i2c_dev *dev)
+static int omap_i2c_init(struct omap_i2c_dev *omap)
 {
 	u16 psc = 0, scll = 0, sclh = 0;
 	u16 fsscll = 0, fssclh = 0, hsscll = 0, hssclh = 0;
@@ -362,23 +362,23 @@ static int omap_i2c_init(struct omap_i2c_dev *dev)
 	unsigned long internal_clk = 0;
 	struct clk *fclk;
 
-	if (dev->rev >= OMAP_I2C_REV_ON_3430_3530) {
+	if (omap->rev >= OMAP_I2C_REV_ON_3430_3530) {
 		/*
 		 * Enabling all wakup sources to stop I2C freezing on
 		 * WFI instruction.
 		 * REVISIT: Some wkup sources might not be needed.
 		 */
-		dev->westate = OMAP_I2C_WE_ALL;
+		omap->westate = OMAP_I2C_WE_ALL;
 	}
 
-	if (dev->flags & OMAP_I2C_FLAG_ALWAYS_ARMXOR_CLK) {
+	if (omap->flags & OMAP_I2C_FLAG_ALWAYS_ARMXOR_CLK) {
 		/*
 		 * The I2C functional clock is the armxor_ck, so there's
 		 * no need to get "armxor_ck" separately.  Now, if OMAP2420
 		 * always returns 12MHz for the functional clock, we can
 		 * do this bit unconditionally.
 		 */
-		fclk = clk_get(dev->dev, "fck");
+		fclk = clk_get(omap->dev, "fck");
 		fclk_rate = clk_get_rate(fclk);
 		clk_put(fclk);
 
@@ -395,7 +395,7 @@ static int omap_i2c_init(struct omap_i2c_dev *dev)
 			psc = fclk_rate / 12000000;
 	}
 
-	if (!(dev->flags & OMAP_I2C_FLAG_SIMPLE_CLOCK)) {
+	if (!(omap->flags & OMAP_I2C_FLAG_SIMPLE_CLOCK)) {
 
 		/*
 		 * HSI2C controller internal clk rate should be 19.2 Mhz for
@@ -403,14 +403,14 @@ static int omap_i2c_init(struct omap_i2c_dev *dev)
 		 * to get longer filter period for better noise suppression.
 		 * The filter is iclk (fclk for HS) period.
 		 */
-		if (dev->speed > 400 ||
-			       dev->flags & OMAP_I2C_FLAG_FORCE_19200_INT_CLK)
+		if (omap->speed > 400 ||
+			       omap->flags & OMAP_I2C_FLAG_FORCE_19200_INT_CLK)
 			internal_clk = 19200;
-		else if (dev->speed > 100)
+		else if (omap->speed > 100)
 			internal_clk = 9600;
 		else
 			internal_clk = 4000;
-		fclk = clk_get(dev->dev, "fck");
+		fclk = clk_get(omap->dev, "fck");
 		fclk_rate = clk_get_rate(fclk) / 1000;
 		clk_put(fclk);
 
@@ -419,7 +419,7 @@ static int omap_i2c_init(struct omap_i2c_dev *dev)
 		psc = psc - 1;
 
 		/* If configured for High Speed */
-		if (dev->speed > 400) {
+		if (omap->speed > 400) {
 			unsigned long scl;
 
 			/* For first phase of HS mode */
@@ -428,20 +428,20 @@ static int omap_i2c_init(struct omap_i2c_dev *dev)
 			fssclh = (scl / 3) - 5;
 
 			/* For second phase of HS mode */
-			scl = fclk_rate / dev->speed;
+			scl = fclk_rate / omap->speed;
 			hsscll = scl - (scl / 3) - 7;
 			hssclh = (scl / 3) - 5;
-		} else if (dev->speed > 100) {
+		} else if (omap->speed > 100) {
 			unsigned long scl;
 
 			/* Fast mode */
-			scl = internal_clk / dev->speed;
+			scl = internal_clk / omap->speed;
 			fsscll = scl - (scl / 3) - 7;
 			fssclh = (scl / 3) - 5;
 		} else {
 			/* Standard mode */
-			fsscll = internal_clk / (dev->speed * 2) - 7;
-			fssclh = internal_clk / (dev->speed * 2) - 5;
+			fsscll = internal_clk / (omap->speed * 2) - 7;
+			fssclh = internal_clk / (omap->speed * 2) - 5;
 		}
 		scll = (hsscll << OMAP_I2C_SCLL_HSSCLL) | fsscll;
 		sclh = (hssclh << OMAP_I2C_SCLH_HSSCLH) | fssclh;
@@ -450,25 +450,25 @@ static int omap_i2c_init(struct omap_i2c_dev *dev)
 		fclk_rate /= (psc + 1) * 1000;
 		if (psc > 2)
 			psc = 2;
-		scll = fclk_rate / (dev->speed * 2) - 7 + psc;
-		sclh = fclk_rate / (dev->speed * 2) - 7 + psc;
+		scll = fclk_rate / (omap->speed * 2) - 7 + psc;
+		sclh = fclk_rate / (omap->speed * 2) - 7 + psc;
 	}
 
-	dev->iestate = (OMAP_I2C_IE_XRDY | OMAP_I2C_IE_RRDY |
+	omap->iestate = (OMAP_I2C_IE_XRDY | OMAP_I2C_IE_RRDY |
 			OMAP_I2C_IE_ARDY | OMAP_I2C_IE_NACK |
-			OMAP_I2C_IE_AL)  | ((dev->fifo_size) ?
+			OMAP_I2C_IE_AL)  | ((omap->fifo_size) ?
 				(OMAP_I2C_IE_RDR | OMAP_I2C_IE_XDR) : 0);
 
-	dev->pscstate = psc;
-	dev->scllstate = scll;
-	dev->sclhstate = sclh;
+	omap->pscstate = psc;
+	omap->scllstate = scll;
+	omap->sclhstate = sclh;
 
-	if (dev->rev <= OMAP_I2C_REV_ON_3430_3530) {
+	if (omap->rev <= OMAP_I2C_REV_ON_3430_3530) {
 		/* Not implemented */
-		dev->bb_valid = 1;
+		omap->bb_valid = 1;
 	}
 
-	__omap_i2c_init(dev);
+	__omap_i2c_init(omap);
 
 	return 0;
 }
@@ -476,14 +476,14 @@ static int omap_i2c_init(struct omap_i2c_dev *dev)
 /*
  * Waiting on Bus Busy
  */
-static int omap_i2c_wait_for_bb(struct omap_i2c_dev *dev)
+static int omap_i2c_wait_for_bb(struct omap_i2c_dev *omap)
 {
 	unsigned long timeout;
 
 	timeout = jiffies + OMAP_I2C_TIMEOUT;
-	while (omap_i2c_read_reg(dev, OMAP_I2C_STAT_REG) & OMAP_I2C_STAT_BB) {
+	while (omap_i2c_read_reg(omap, OMAP_I2C_STAT_REG) & OMAP_I2C_STAT_BB) {
 		if (time_after(jiffies, timeout))
-			return i2c_recover_bus(&dev->adapter);
+			return i2c_recover_bus(&omap->adapter);
 		msleep(1);
 	}
 
@@ -518,19 +518,19 @@ static int omap_i2c_wait_for_bb(struct omap_i2c_dev *dev)
  * 3. Any transfer started in the middle of another master's transfer
  *    results in unpredictable results and data corruption
  */
-static int omap_i2c_wait_for_bb_valid(struct omap_i2c_dev *dev)
+static int omap_i2c_wait_for_bb_valid(struct omap_i2c_dev *omap)
 {
 	unsigned long bus_free_timeout = 0;
 	unsigned long timeout;
 	int bus_free = 0;
 	u16 stat, systest;
 
-	if (dev->bb_valid)
+	if (omap->bb_valid)
 		return 0;
 
 	timeout = jiffies + OMAP_I2C_TIMEOUT;
 	while (1) {
-		stat = omap_i2c_read_reg(dev, OMAP_I2C_STAT_REG);
+		stat = omap_i2c_read_reg(omap, OMAP_I2C_STAT_REG);
 		/*
 		 * We will see BB or BF event in a case IP had detected any
 		 * activity on the I2C bus. Now IP correctly tracks the bus
@@ -543,7 +543,7 @@ static int omap_i2c_wait_for_bb_valid(struct omap_i2c_dev *dev)
 		 * Otherwise, we must look signals on the bus to make
 		 * the right decision.
 		 */
-		systest = omap_i2c_read_reg(dev, OMAP_I2C_SYSTEST_REG);
+		systest = omap_i2c_read_reg(omap, OMAP_I2C_SYSTEST_REG);
 		if ((systest & OMAP_I2C_SYSTEST_SCL_I_FUNC) &&
 		    (systest & OMAP_I2C_SYSTEST_SDA_I_FUNC)) {
 			if (!bus_free) {
@@ -564,22 +564,22 @@ static int omap_i2c_wait_for_bb_valid(struct omap_i2c_dev *dev)
 		}
 
 		if (time_after(jiffies, timeout)) {
-			dev_warn(dev->dev, "timeout waiting for bus ready\n");
+			dev_warn(omap->dev, "timeout waiting for bus ready\n");
 			return -ETIMEDOUT;
 		}
 
 		msleep(1);
 	}
 
-	dev->bb_valid = 1;
+	omap->bb_valid = 1;
 	return 0;
 }
 
-static void omap_i2c_resize_fifo(struct omap_i2c_dev *dev, u8 size, bool is_rx)
+static void omap_i2c_resize_fifo(struct omap_i2c_dev *omap, u8 size, bool is_rx)
 {
 	u16		buf;
 
-	if (dev->flags & OMAP_I2C_FLAG_NO_FIFO)
+	if (omap->flags & OMAP_I2C_FLAG_NO_FIFO)
 		return;
 
 	/*
@@ -589,29 +589,29 @@ static void omap_i2c_resize_fifo(struct omap_i2c_dev *dev, u8 size, bool is_rx)
 	 * then we might use draining feature to transfer the remaining bytes.
 	 */
 
-	dev->threshold = clamp(size, (u8) 1, dev->fifo_size);
+	omap->threshold = clamp(size, (u8) 1, omap->fifo_size);
 
-	buf = omap_i2c_read_reg(dev, OMAP_I2C_BUF_REG);
+	buf = omap_i2c_read_reg(omap, OMAP_I2C_BUF_REG);
 
 	if (is_rx) {
 		/* Clear RX Threshold */
 		buf &= ~(0x3f << 8);
-		buf |= ((dev->threshold - 1) << 8) | OMAP_I2C_BUF_RXFIF_CLR;
+		buf |= ((omap->threshold - 1) << 8) | OMAP_I2C_BUF_RXFIF_CLR;
 	} else {
 		/* Clear TX Threshold */
 		buf &= ~0x3f;
-		buf |= (dev->threshold - 1) | OMAP_I2C_BUF_TXFIF_CLR;
+		buf |= (omap->threshold - 1) | OMAP_I2C_BUF_TXFIF_CLR;
 	}
 
-	omap_i2c_write_reg(dev, OMAP_I2C_BUF_REG, buf);
+	omap_i2c_write_reg(omap, OMAP_I2C_BUF_REG, buf);
 
-	if (dev->rev < OMAP_I2C_REV_ON_3630)
-		dev->b_hw = 1; /* Enable hardware fixes */
+	if (omap->rev < OMAP_I2C_REV_ON_3630)
+		omap->b_hw = 1; /* Enable hardware fixes */
 
 	/* calculate wakeup latency constraint for MPU */
-	if (dev->set_mpu_wkup_lat != NULL)
-		dev->latency = (1000000 * dev->threshold) /
-			(1000 * dev->speed / 8);
+	if (omap->set_mpu_wkup_lat != NULL)
+		omap->latency = (1000000 * omap->threshold) /
+			(1000 * omap->speed / 8);
 }
 
 /*
@@ -620,42 +620,42 @@ static void omap_i2c_resize_fifo(struct omap_i2c_dev *dev, u8 size, bool is_rx)
 static int omap_i2c_xfer_msg(struct i2c_adapter *adap,
 			     struct i2c_msg *msg, int stop)
 {
-	struct omap_i2c_dev *dev = i2c_get_adapdata(adap);
+	struct omap_i2c_dev *omap = i2c_get_adapdata(adap);
 	unsigned long timeout;
 	u16 w;
 
-	dev_dbg(dev->dev, "addr: 0x%04x, len: %d, flags: 0x%x, stop: %d\n",
+	dev_dbg(omap->dev, "addr: 0x%04x, len: %d, flags: 0x%x, stop: %d\n",
 		msg->addr, msg->len, msg->flags, stop);
 
 	if (msg->len == 0)
 		return -EINVAL;
 
-	dev->receiver = !!(msg->flags & I2C_M_RD);
-	omap_i2c_resize_fifo(dev, msg->len, dev->receiver);
+	omap->receiver = !!(msg->flags & I2C_M_RD);
+	omap_i2c_resize_fifo(omap, msg->len, omap->receiver);
 
-	omap_i2c_write_reg(dev, OMAP_I2C_SA_REG, msg->addr);
+	omap_i2c_write_reg(omap, OMAP_I2C_SA_REG, msg->addr);
 
 	/* REVISIT: Could the STB bit of I2C_CON be used with probing? */
-	dev->buf = msg->buf;
-	dev->buf_len = msg->len;
+	omap->buf = msg->buf;
+	omap->buf_len = msg->len;
 
-	/* make sure writes to dev->buf_len are ordered */
+	/* make sure writes to omap->buf_len are ordered */
 	barrier();
 
-	omap_i2c_write_reg(dev, OMAP_I2C_CNT_REG, dev->buf_len);
+	omap_i2c_write_reg(omap, OMAP_I2C_CNT_REG, omap->buf_len);
 
 	/* Clear the FIFO Buffers */
-	w = omap_i2c_read_reg(dev, OMAP_I2C_BUF_REG);
+	w = omap_i2c_read_reg(omap, OMAP_I2C_BUF_REG);
 	w |= OMAP_I2C_BUF_RXFIF_CLR | OMAP_I2C_BUF_TXFIF_CLR;
-	omap_i2c_write_reg(dev, OMAP_I2C_BUF_REG, w);
+	omap_i2c_write_reg(omap, OMAP_I2C_BUF_REG, w);
 
-	reinit_completion(&dev->cmd_complete);
-	dev->cmd_err = 0;
+	reinit_completion(&omap->cmd_complete);
+	omap->cmd_err = 0;
 
 	w = OMAP_I2C_CON_EN | OMAP_I2C_CON_MST | OMAP_I2C_CON_STT;
 
 	/* High speed configuration */
-	if (dev->speed > 400)
+	if (omap->speed > 400)
 		w |= OMAP_I2C_CON_OPMODE_HS;
 
 	if (msg->flags & I2C_M_STOP)
@@ -665,27 +665,27 @@ static int omap_i2c_xfer_msg(struct i2c_adapter *adap,
 	if (!(msg->flags & I2C_M_RD))
 		w |= OMAP_I2C_CON_TRX;
 
-	if (!dev->b_hw && stop)
+	if (!omap->b_hw && stop)
 		w |= OMAP_I2C_CON_STP;
 	/*
 	 * NOTE: STAT_BB bit could became 1 here if another master occupy
 	 * the bus. IP successfully complete transfer when the bus will be
 	 * free again (BB reset to 0).
 	 */
-	omap_i2c_write_reg(dev, OMAP_I2C_CON_REG, w);
+	omap_i2c_write_reg(omap, OMAP_I2C_CON_REG, w);
 
 	/*
 	 * Don't write stt and stp together on some hardware.
 	 */
-	if (dev->b_hw && stop) {
+	if (omap->b_hw && stop) {
 		unsigned long delay = jiffies + OMAP_I2C_TIMEOUT;
-		u16 con = omap_i2c_read_reg(dev, OMAP_I2C_CON_REG);
+		u16 con = omap_i2c_read_reg(omap, OMAP_I2C_CON_REG);
 		while (con & OMAP_I2C_CON_STT) {
-			con = omap_i2c_read_reg(dev, OMAP_I2C_CON_REG);
+			con = omap_i2c_read_reg(omap, OMAP_I2C_CON_REG);
 
 			/* Let the user know if i2c is in a bad state */
 			if (time_after(jiffies, delay)) {
-				dev_err(dev->dev, "controller timed out "
+				dev_err(omap->dev, "controller timed out "
 				"waiting for start condition to finish\n");
 				return -ETIMEDOUT;
 			}
@@ -694,42 +694,42 @@ static int omap_i2c_xfer_msg(struct i2c_adapter *adap,
 
 		w |= OMAP_I2C_CON_STP;
 		w &= ~OMAP_I2C_CON_STT;
-		omap_i2c_write_reg(dev, OMAP_I2C_CON_REG, w);
+		omap_i2c_write_reg(omap, OMAP_I2C_CON_REG, w);
 	}
 
 	/*
 	 * REVISIT: We should abort the transfer on signals, but the bus goes
 	 * into arbitration and we're currently unable to recover from it.
 	 */
-	timeout = wait_for_completion_timeout(&dev->cmd_complete,
+	timeout = wait_for_completion_timeout(&omap->cmd_complete,
 						OMAP_I2C_TIMEOUT);
 	if (timeout == 0) {
-		dev_err(dev->dev, "controller timed out\n");
-		omap_i2c_reset(dev);
-		__omap_i2c_init(dev);
+		dev_err(omap->dev, "controller timed out\n");
+		omap_i2c_reset(omap);
+		__omap_i2c_init(omap);
 		return -ETIMEDOUT;
 	}
 
-	if (likely(!dev->cmd_err))
+	if (likely(!omap->cmd_err))
 		return 0;
 
 	/* We have an error */
-	if (dev->cmd_err & (OMAP_I2C_STAT_ROVR | OMAP_I2C_STAT_XUDF)) {
-		omap_i2c_reset(dev);
-		__omap_i2c_init(dev);
+	if (omap->cmd_err & (OMAP_I2C_STAT_ROVR | OMAP_I2C_STAT_XUDF)) {
+		omap_i2c_reset(omap);
+		__omap_i2c_init(omap);
 		return -EIO;
 	}
 
-	if (dev->cmd_err & OMAP_I2C_STAT_AL)
+	if (omap->cmd_err & OMAP_I2C_STAT_AL)
 		return -EAGAIN;
 
-	if (dev->cmd_err & OMAP_I2C_STAT_NACK) {
+	if (omap->cmd_err & OMAP_I2C_STAT_NACK) {
 		if (msg->flags & I2C_M_IGNORE_NAK)
 			return 0;
 
-		w = omap_i2c_read_reg(dev, OMAP_I2C_CON_REG);
+		w = omap_i2c_read_reg(omap, OMAP_I2C_CON_REG);
 		w |= OMAP_I2C_CON_STP;
-		omap_i2c_write_reg(dev, OMAP_I2C_CON_REG, w);
+		omap_i2c_write_reg(omap, OMAP_I2C_CON_REG, w);
 		return -EREMOTEIO;
 	}
 	return -EIO;
@@ -743,24 +743,24 @@ static int omap_i2c_xfer_msg(struct i2c_adapter *adap,
 static int
 omap_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
 {
-	struct omap_i2c_dev *dev = i2c_get_adapdata(adap);
+	struct omap_i2c_dev *omap = i2c_get_adapdata(adap);
 	int i;
 	int r;
 
-	r = pm_runtime_get_sync(dev->dev);
+	r = pm_runtime_get_sync(omap->dev);
 	if (r < 0)
 		goto out;
 
-	r = omap_i2c_wait_for_bb_valid(dev);
+	r = omap_i2c_wait_for_bb_valid(omap);
 	if (r < 0)
 		goto out;
 
-	r = omap_i2c_wait_for_bb(dev);
+	r = omap_i2c_wait_for_bb(omap);
 	if (r < 0)
 		goto out;
 
-	if (dev->set_mpu_wkup_lat != NULL)
-		dev->set_mpu_wkup_lat(dev->dev, dev->latency);
+	if (omap->set_mpu_wkup_lat != NULL)
+		omap->set_mpu_wkup_lat(omap->dev, omap->latency);
 
 	for (i = 0; i < num; i++) {
 		r = omap_i2c_xfer_msg(adap, &msgs[i], (i == (num - 1)));
@@ -771,14 +771,14 @@ omap_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
 	if (r == 0)
 		r = num;
 
-	omap_i2c_wait_for_bb(dev);
+	omap_i2c_wait_for_bb(omap);
 
-	if (dev->set_mpu_wkup_lat != NULL)
-		dev->set_mpu_wkup_lat(dev->dev, -1);
+	if (omap->set_mpu_wkup_lat != NULL)
+		omap->set_mpu_wkup_lat(omap->dev, -1);
 
 out:
-	pm_runtime_mark_last_busy(dev->dev);
-	pm_runtime_put_autosuspend(dev->dev);
+	pm_runtime_mark_last_busy(omap->dev);
+	pm_runtime_put_autosuspend(omap->dev);
 	return r;
 }
 
@@ -790,19 +790,19 @@ omap_i2c_func(struct i2c_adapter *adap)
 }
 
 static inline void
-omap_i2c_complete_cmd(struct omap_i2c_dev *dev, u16 err)
+omap_i2c_complete_cmd(struct omap_i2c_dev *omap, u16 err)
 {
-	dev->cmd_err |= err;
-	complete(&dev->cmd_complete);
+	omap->cmd_err |= err;
+	complete(&omap->cmd_complete);
 }
 
 static inline void
-omap_i2c_ack_stat(struct omap_i2c_dev *dev, u16 stat)
+omap_i2c_ack_stat(struct omap_i2c_dev *omap, u16 stat)
 {
-	omap_i2c_write_reg(dev, OMAP_I2C_STAT_REG, stat);
+	omap_i2c_write_reg(omap, OMAP_I2C_STAT_REG, stat);
 }
 
-static inline void i2c_omap_errata_i207(struct omap_i2c_dev *dev, u16 stat)
+static inline void i2c_omap_errata_i207(struct omap_i2c_dev *omap, u16 stat)
 {
 	/*
 	 * I2C Errata(Errata Nos. OMAP2: 1.67, OMAP3: 1.8)
@@ -813,17 +813,17 @@ static inline void i2c_omap_errata_i207(struct omap_i2c_dev *dev, u16 stat)
 	 */
 	if (stat & OMAP_I2C_STAT_RDR) {
 		/* Step 1: If RDR is set, clear it */
-		omap_i2c_ack_stat(dev, OMAP_I2C_STAT_RDR);
+		omap_i2c_ack_stat(omap, OMAP_I2C_STAT_RDR);
 
 		/* Step 2: */
-		if (!(omap_i2c_read_reg(dev, OMAP_I2C_STAT_REG)
+		if (!(omap_i2c_read_reg(omap, OMAP_I2C_STAT_REG)
 						& OMAP_I2C_STAT_BB)) {
 
 			/* Step 3: */
-			if (omap_i2c_read_reg(dev, OMAP_I2C_STAT_REG)
+			if (omap_i2c_read_reg(omap, OMAP_I2C_STAT_REG)
 						& OMAP_I2C_STAT_RDR) {
-				omap_i2c_ack_stat(dev, OMAP_I2C_STAT_RDR);
-				dev_dbg(dev->dev, "RDR when bus is busy.\n");
+				omap_i2c_ack_stat(omap, OMAP_I2C_STAT_RDR);
+				dev_dbg(omap->dev, "RDR when bus is busy.\n");
 			}
 
 		}
@@ -836,50 +836,50 @@ static inline void i2c_omap_errata_i207(struct omap_i2c_dev *dev, u16 stat)
 static irqreturn_t
 omap_i2c_omap1_isr(int this_irq, void *dev_id)
 {
-	struct omap_i2c_dev *dev = dev_id;
+	struct omap_i2c_dev *omap = dev_id;
 	u16 iv, w;
 
-	if (pm_runtime_suspended(dev->dev))
+	if (pm_runtime_suspended(omap->dev))
 		return IRQ_NONE;
 
-	iv = omap_i2c_read_reg(dev, OMAP_I2C_IV_REG);
+	iv = omap_i2c_read_reg(omap, OMAP_I2C_IV_REG);
 	switch (iv) {
 	case 0x00:	/* None */
 		break;
 	case 0x01:	/* Arbitration lost */
-		dev_err(dev->dev, "Arbitration lost\n");
-		omap_i2c_complete_cmd(dev, OMAP_I2C_STAT_AL);
+		dev_err(omap->dev, "Arbitration lost\n");
+		omap_i2c_complete_cmd(omap, OMAP_I2C_STAT_AL);
 		break;
 	case 0x02:	/* No acknowledgement */
-		omap_i2c_complete_cmd(dev, OMAP_I2C_STAT_NACK);
-		omap_i2c_write_reg(dev, OMAP_I2C_CON_REG, OMAP_I2C_CON_STP);
+		omap_i2c_complete_cmd(omap, OMAP_I2C_STAT_NACK);
+		omap_i2c_write_reg(omap, OMAP_I2C_CON_REG, OMAP_I2C_CON_STP);
 		break;
 	case 0x03:	/* Register access ready */
-		omap_i2c_complete_cmd(dev, 0);
+		omap_i2c_complete_cmd(omap, 0);
 		break;
 	case 0x04:	/* Receive data ready */
-		if (dev->buf_len) {
-			w = omap_i2c_read_reg(dev, OMAP_I2C_DATA_REG);
-			*dev->buf++ = w;
-			dev->buf_len--;
-			if (dev->buf_len) {
-				*dev->buf++ = w >> 8;
-				dev->buf_len--;
+		if (omap->buf_len) {
+			w = omap_i2c_read_reg(omap, OMAP_I2C_DATA_REG);
+			*omap->buf++ = w;
+			omap->buf_len--;
+			if (omap->buf_len) {
+				*omap->buf++ = w >> 8;
+				omap->buf_len--;
 			}
 		} else
-			dev_err(dev->dev, "RRDY IRQ while no data requested\n");
+			dev_err(omap->dev, "RRDY IRQ while no data requested\n");
 		break;
 	case 0x05:	/* Transmit data ready */
-		if (dev->buf_len) {
-			w = *dev->buf++;
-			dev->buf_len--;
-			if (dev->buf_len) {
-				w |= *dev->buf++ << 8;
-				dev->buf_len--;
+		if (omap->buf_len) {
+			w = *omap->buf++;
+			omap->buf_len--;
+			if (omap->buf_len) {
+				w |= *omap->buf++ << 8;
+				omap->buf_len--;
 			}
-			omap_i2c_write_reg(dev, OMAP_I2C_DATA_REG, w);
+			omap_i2c_write_reg(omap, OMAP_I2C_DATA_REG, w);
 		} else
-			dev_err(dev->dev, "XRDY IRQ while no data to send\n");
+			dev_err(omap->dev, "XRDY IRQ while no data to send\n");
 		break;
 	default:
 		return IRQ_NONE;
@@ -896,28 +896,28 @@ omap_i2c_omap1_isr(int this_irq, void *dev_id)
  * data to DATA_REG. Otherwise some data bytes can be lost while transferring
  * them from the memory to the I2C interface.
  */
-static int errata_omap3_i462(struct omap_i2c_dev *dev)
+static int errata_omap3_i462(struct omap_i2c_dev *omap)
 {
 	unsigned long timeout = 10000;
 	u16 stat;
 
 	do {
-		stat = omap_i2c_read_reg(dev, OMAP_I2C_STAT_REG);
+		stat = omap_i2c_read_reg(omap, OMAP_I2C_STAT_REG);
 		if (stat & OMAP_I2C_STAT_XUDF)
 			break;
 
 		if (stat & (OMAP_I2C_STAT_NACK | OMAP_I2C_STAT_AL)) {
-			omap_i2c_ack_stat(dev, (OMAP_I2C_STAT_XRDY |
+			omap_i2c_ack_stat(omap, (OMAP_I2C_STAT_XRDY |
 							OMAP_I2C_STAT_XDR));
 			if (stat & OMAP_I2C_STAT_NACK) {
-				dev->cmd_err |= OMAP_I2C_STAT_NACK;
-				omap_i2c_ack_stat(dev, OMAP_I2C_STAT_NACK);
+				omap->cmd_err |= OMAP_I2C_STAT_NACK;
+				omap_i2c_ack_stat(omap, OMAP_I2C_STAT_NACK);
 			}
 
 			if (stat & OMAP_I2C_STAT_AL) {
-				dev_err(dev->dev, "Arbitration lost\n");
-				dev->cmd_err |= OMAP_I2C_STAT_AL;
-				omap_i2c_ack_stat(dev, OMAP_I2C_STAT_AL);
+				dev_err(omap->dev, "Arbitration lost\n");
+				omap->cmd_err |= OMAP_I2C_STAT_AL;
+				omap_i2c_ack_stat(omap, OMAP_I2C_STAT_AL);
 			}
 
 			return -EIO;
@@ -927,61 +927,61 @@ static int errata_omap3_i462(struct omap_i2c_dev *dev)
 	} while (--timeout);
 
 	if (!timeout) {
-		dev_err(dev->dev, "timeout waiting on XUDF bit\n");
+		dev_err(omap->dev, "timeout waiting on XUDF bit\n");
 		return 0;
 	}
 
 	return 0;
 }
 
-static void omap_i2c_receive_data(struct omap_i2c_dev *dev, u8 num_bytes,
+static void omap_i2c_receive_data(struct omap_i2c_dev *omap, u8 num_bytes,
 		bool is_rdr)
 {
 	u16		w;
 
 	while (num_bytes--) {
-		w = omap_i2c_read_reg(dev, OMAP_I2C_DATA_REG);
-		*dev->buf++ = w;
-		dev->buf_len--;
+		w = omap_i2c_read_reg(omap, OMAP_I2C_DATA_REG);
+		*omap->buf++ = w;
+		omap->buf_len--;
 
 		/*
 		 * Data reg in 2430, omap3 and
 		 * omap4 is 8 bit wide
 		 */
-		if (dev->flags & OMAP_I2C_FLAG_16BIT_DATA_REG) {
-			*dev->buf++ = w >> 8;
-			dev->buf_len--;
+		if (omap->flags & OMAP_I2C_FLAG_16BIT_DATA_REG) {
+			*omap->buf++ = w >> 8;
+			omap->buf_len--;
 		}
 	}
 }
 
-static int omap_i2c_transmit_data(struct omap_i2c_dev *dev, u8 num_bytes,
+static int omap_i2c_transmit_data(struct omap_i2c_dev *omap, u8 num_bytes,
 		bool is_xdr)
 {
 	u16		w;
 
 	while (num_bytes--) {
-		w = *dev->buf++;
-		dev->buf_len--;
+		w = *omap->buf++;
+		omap->buf_len--;
 
 		/*
 		 * Data reg in 2430, omap3 and
 		 * omap4 is 8 bit wide
 		 */
-		if (dev->flags & OMAP_I2C_FLAG_16BIT_DATA_REG) {
-			w |= *dev->buf++ << 8;
-			dev->buf_len--;
+		if (omap->flags & OMAP_I2C_FLAG_16BIT_DATA_REG) {
+			w |= *omap->buf++ << 8;
+			omap->buf_len--;
 		}
 
-		if (dev->errata & I2C_OMAP_ERRATA_I462) {
+		if (omap->errata & I2C_OMAP_ERRATA_I462) {
 			int ret;
 
-			ret = errata_omap3_i462(dev);
+			ret = errata_omap3_i462(omap);
 			if (ret < 0)
 				return ret;
 		}
 
-		omap_i2c_write_reg(dev, OMAP_I2C_DATA_REG, w);
+		omap_i2c_write_reg(omap, OMAP_I2C_DATA_REG, w);
 	}
 
 	return 0;
@@ -990,19 +990,19 @@ static int omap_i2c_transmit_data(struct omap_i2c_dev *dev, u8 num_bytes,
 static irqreturn_t
 omap_i2c_isr(int irq, void *dev_id)
 {
-	struct omap_i2c_dev *dev = dev_id;
+	struct omap_i2c_dev *omap = dev_id;
 	irqreturn_t ret = IRQ_HANDLED;
 	u16 mask;
 	u16 stat;
 
-	spin_lock(&dev->lock);
-	mask = omap_i2c_read_reg(dev, OMAP_I2C_IE_REG);
-	stat = omap_i2c_read_reg(dev, OMAP_I2C_STAT_REG);
+	spin_lock(&omap->lock);
+	mask = omap_i2c_read_reg(omap, OMAP_I2C_IE_REG);
+	stat = omap_i2c_read_reg(omap, OMAP_I2C_STAT_REG);
 
 	if (stat & mask)
 		ret = IRQ_WAKE_THREAD;
 
-	spin_unlock(&dev->lock);
+	spin_unlock(&omap->lock);
 
 	return ret;
 }
@@ -1010,20 +1010,20 @@ omap_i2c_isr(int irq, void *dev_id)
 static irqreturn_t
 omap_i2c_isr_thread(int this_irq, void *dev_id)
 {
-	struct omap_i2c_dev *dev = dev_id;
+	struct omap_i2c_dev *omap = dev_id;
 	unsigned long flags;
 	u16 bits;
 	u16 stat;
 	int err = 0, count = 0;
 
-	spin_lock_irqsave(&dev->lock, flags);
+	spin_lock_irqsave(&omap->lock, flags);
 	do {
-		bits = omap_i2c_read_reg(dev, OMAP_I2C_IE_REG);
-		stat = omap_i2c_read_reg(dev, OMAP_I2C_STAT_REG);
+		bits = omap_i2c_read_reg(omap, OMAP_I2C_IE_REG);
+		stat = omap_i2c_read_reg(omap, OMAP_I2C_STAT_REG);
 		stat &= bits;
 
 		/* If we're in receiver mode, ignore XDR/XRDY */
-		if (dev->receiver)
+		if (omap->receiver)
 			stat &= ~(OMAP_I2C_STAT_XDR | OMAP_I2C_STAT_XRDY);
 		else
 			stat &= ~(OMAP_I2C_STAT_RDR | OMAP_I2C_STAT_RRDY);
@@ -1033,32 +1033,32 @@ omap_i2c_isr_thread(int this_irq, void *dev_id)
 			goto out;
 		}
 
-		dev_dbg(dev->dev, "IRQ (ISR = 0x%04x)\n", stat);
+		dev_dbg(omap->dev, "IRQ (ISR = 0x%04x)\n", stat);
 		if (count++ == 100) {
-			dev_warn(dev->dev, "Too much work in one IRQ\n");
+			dev_warn(omap->dev, "Too much work in one IRQ\n");
 			break;
 		}
 
 		if (stat & OMAP_I2C_STAT_NACK) {
 			err |= OMAP_I2C_STAT_NACK;
-			omap_i2c_ack_stat(dev, OMAP_I2C_STAT_NACK);
+			omap_i2c_ack_stat(omap, OMAP_I2C_STAT_NACK);
 		}
 
 		if (stat & OMAP_I2C_STAT_AL) {
-			dev_err(dev->dev, "Arbitration lost\n");
+			dev_err(omap->dev, "Arbitration lost\n");
 			err |= OMAP_I2C_STAT_AL;
-			omap_i2c_ack_stat(dev, OMAP_I2C_STAT_AL);
+			omap_i2c_ack_stat(omap, OMAP_I2C_STAT_AL);
 		}
 
 		/*
 		 * ProDB0017052: Clear ARDY bit twice
 		 */
 		if (stat & OMAP_I2C_STAT_ARDY)
-			omap_i2c_ack_stat(dev, OMAP_I2C_STAT_ARDY);
+			omap_i2c_ack_stat(omap, OMAP_I2C_STAT_ARDY);
 
 		if (stat & (OMAP_I2C_STAT_ARDY | OMAP_I2C_STAT_NACK |
 					OMAP_I2C_STAT_AL)) {
-			omap_i2c_ack_stat(dev, (OMAP_I2C_STAT_RRDY |
+			omap_i2c_ack_stat(omap, (OMAP_I2C_STAT_RRDY |
 						OMAP_I2C_STAT_RDR |
 						OMAP_I2C_STAT_XRDY |
 						OMAP_I2C_STAT_XDR |
@@ -1069,28 +1069,28 @@ omap_i2c_isr_thread(int this_irq, void *dev_id)
 		if (stat & OMAP_I2C_STAT_RDR) {
 			u8 num_bytes = 1;
 
-			if (dev->fifo_size)
-				num_bytes = dev->buf_len;
+			if (omap->fifo_size)
+				num_bytes = omap->buf_len;
 
-			if (dev->errata & I2C_OMAP_ERRATA_I207) {
-				i2c_omap_errata_i207(dev, stat);
-				num_bytes = (omap_i2c_read_reg(dev,
+			if (omap->errata & I2C_OMAP_ERRATA_I207) {
+				i2c_omap_errata_i207(omap, stat);
+				num_bytes = (omap_i2c_read_reg(omap,
 					OMAP_I2C_BUFSTAT_REG) >> 8) & 0x3F;
 			}
 
-			omap_i2c_receive_data(dev, num_bytes, true);
-			omap_i2c_ack_stat(dev, OMAP_I2C_STAT_RDR);
+			omap_i2c_receive_data(omap, num_bytes, true);
+			omap_i2c_ack_stat(omap, OMAP_I2C_STAT_RDR);
 			continue;
 		}
 
 		if (stat & OMAP_I2C_STAT_RRDY) {
 			u8 num_bytes = 1;
 
-			if (dev->threshold)
-				num_bytes = dev->threshold;
+			if (omap->threshold)
+				num_bytes = omap->threshold;
 
-			omap_i2c_receive_data(dev, num_bytes, false);
-			omap_i2c_ack_stat(dev, OMAP_I2C_STAT_RRDY);
+			omap_i2c_receive_data(omap, num_bytes, false);
+			omap_i2c_ack_stat(omap, OMAP_I2C_STAT_RRDY);
 			continue;
 		}
 
@@ -1098,14 +1098,14 @@ omap_i2c_isr_thread(int this_irq, void *dev_id)
 			u8 num_bytes = 1;
 			int ret;
 
-			if (dev->fifo_size)
-				num_bytes = dev->buf_len;
+			if (omap->fifo_size)
+				num_bytes = omap->buf_len;
 
-			ret = omap_i2c_transmit_data(dev, num_bytes, true);
+			ret = omap_i2c_transmit_data(omap, num_bytes, true);
 			if (ret < 0)
 				break;
 
-			omap_i2c_ack_stat(dev, OMAP_I2C_STAT_XDR);
+			omap_i2c_ack_stat(omap, OMAP_I2C_STAT_XDR);
 			continue;
 		}
 
@@ -1113,36 +1113,36 @@ omap_i2c_isr_thread(int this_irq, void *dev_id)
 			u8 num_bytes = 1;
 			int ret;
 
-			if (dev->threshold)
-				num_bytes = dev->threshold;
+			if (omap->threshold)
+				num_bytes = omap->threshold;
 
-			ret = omap_i2c_transmit_data(dev, num_bytes, false);
+			ret = omap_i2c_transmit_data(omap, num_bytes, false);
 			if (ret < 0)
 				break;
 
-			omap_i2c_ack_stat(dev, OMAP_I2C_STAT_XRDY);
+			omap_i2c_ack_stat(omap, OMAP_I2C_STAT_XRDY);
 			continue;
 		}
 
 		if (stat & OMAP_I2C_STAT_ROVR) {
-			dev_err(dev->dev, "Receive overrun\n");
+			dev_err(omap->dev, "Receive overrun\n");
 			err |= OMAP_I2C_STAT_ROVR;
-			omap_i2c_ack_stat(dev, OMAP_I2C_STAT_ROVR);
+			omap_i2c_ack_stat(omap, OMAP_I2C_STAT_ROVR);
 			break;
 		}
 
 		if (stat & OMAP_I2C_STAT_XUDF) {
-			dev_err(dev->dev, "Transmit underflow\n");
+			dev_err(omap->dev, "Transmit underflow\n");
 			err |= OMAP_I2C_STAT_XUDF;
-			omap_i2c_ack_stat(dev, OMAP_I2C_STAT_XUDF);
+			omap_i2c_ack_stat(omap, OMAP_I2C_STAT_XUDF);
 			break;
 		}
 	} while (stat);
 
-	omap_i2c_complete_cmd(dev, err);
+	omap_i2c_complete_cmd(omap, err);
 
 out:
-	spin_unlock_irqrestore(&dev->lock, flags);
+	spin_unlock_irqrestore(&omap->lock, flags);
 
 	return IRQ_HANDLED;
 }
@@ -1284,7 +1284,7 @@ static struct i2c_bus_recovery_info omap_i2c_bus_recovery_info = {
 static int
 omap_i2c_probe(struct platform_device *pdev)
 {
-	struct omap_i2c_dev	*dev;
+	struct omap_i2c_dev	*omap;
 	struct i2c_adapter	*adap;
 	struct resource		*mem;
 	const struct omap_i2c_bus_platform_data *pdata =
@@ -1302,47 +1302,47 @@ omap_i2c_probe(struct platform_device *pdev)
 		return irq;
 	}
 
-	dev = devm_kzalloc(&pdev->dev, sizeof(struct omap_i2c_dev), GFP_KERNEL);
-	if (!dev)
+	omap = devm_kzalloc(&pdev->dev, sizeof(struct omap_i2c_dev), GFP_KERNEL);
+	if (!omap)
 		return -ENOMEM;
 
 	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	dev->base = devm_ioremap_resource(&pdev->dev, mem);
-	if (IS_ERR(dev->base))
-		return PTR_ERR(dev->base);
+	omap->base = devm_ioremap_resource(&pdev->dev, mem);
+	if (IS_ERR(omap->base))
+		return PTR_ERR(omap->base);
 
 	match = of_match_device(of_match_ptr(omap_i2c_of_match), &pdev->dev);
 	if (match) {
 		u32 freq = 100000; /* default to 100000 Hz */
 
 		pdata = match->data;
-		dev->flags = pdata->flags;
+		omap->flags = pdata->flags;
 
 		of_property_read_u32(node, "clock-frequency", &freq);
 		/* convert DT freq value in Hz into kHz for speed */
-		dev->speed = freq / 1000;
+		omap->speed = freq / 1000;
 	} else if (pdata != NULL) {
-		dev->speed = pdata->clkrate;
-		dev->flags = pdata->flags;
-		dev->set_mpu_wkup_lat = pdata->set_mpu_wkup_lat;
+		omap->speed = pdata->clkrate;
+		omap->flags = pdata->flags;
+		omap->set_mpu_wkup_lat = pdata->set_mpu_wkup_lat;
 	}
 
-	dev->dev = &pdev->dev;
-	dev->irq = irq;
+	omap->dev = &pdev->dev;
+	omap->irq = irq;
 
-	spin_lock_init(&dev->lock);
+	spin_lock_init(&omap->lock);
 
-	platform_set_drvdata(pdev, dev);
-	init_completion(&dev->cmd_complete);
+	platform_set_drvdata(pdev, omap);
+	init_completion(&omap->cmd_complete);
 
-	dev->reg_shift = (dev->flags >> OMAP_I2C_FLAG_BUS_SHIFT__SHIFT) & 3;
+	omap->reg_shift = (omap->flags >> OMAP_I2C_FLAG_BUS_SHIFT__SHIFT) & 3;
 
-	pm_runtime_enable(dev->dev);
-	pm_runtime_set_autosuspend_delay(dev->dev, OMAP_I2C_PM_TIMEOUT);
-	pm_runtime_use_autosuspend(dev->dev);
+	pm_runtime_enable(omap->dev);
+	pm_runtime_set_autosuspend_delay(omap->dev, OMAP_I2C_PM_TIMEOUT);
+	pm_runtime_use_autosuspend(omap->dev);
 
-	r = pm_runtime_get_sync(dev->dev);
-	if (r < 0)
+	r = pm_runtime_get_sync(omap->dev);
+	if (IS_ERR_VALUE(r))
 		goto err_free_mem;
 
 	/*
@@ -1351,42 +1351,42 @@ omap_i2c_probe(struct platform_device *pdev)
 	 * Also since the omap_i2c_read_reg uses reg_map_ip_* a
 	 * readw_relaxed is done.
 	 */
-	rev = readw_relaxed(dev->base + 0x04);
+	rev = readw_relaxed(omap->base + 0x04);
 
-	dev->scheme = OMAP_I2C_SCHEME(rev);
-	switch (dev->scheme) {
+	omap->scheme = OMAP_I2C_SCHEME(rev);
+	switch (omap->scheme) {
 	case OMAP_I2C_SCHEME_0:
-		dev->regs = (u8 *)reg_map_ip_v1;
-		dev->rev = omap_i2c_read_reg(dev, OMAP_I2C_REV_REG);
-		minor = OMAP_I2C_REV_SCHEME_0_MAJOR(dev->rev);
-		major = OMAP_I2C_REV_SCHEME_0_MAJOR(dev->rev);
+		omap->regs = (u8 *)reg_map_ip_v1;
+		omap->rev = omap_i2c_read_reg(omap, OMAP_I2C_REV_REG);
+		minor = OMAP_I2C_REV_SCHEME_0_MAJOR(omap->rev);
+		major = OMAP_I2C_REV_SCHEME_0_MAJOR(omap->rev);
 		break;
 	case OMAP_I2C_SCHEME_1:
 		/* FALLTHROUGH */
 	default:
-		dev->regs = (u8 *)reg_map_ip_v2;
+		omap->regs = (u8 *)reg_map_ip_v2;
 		rev = (rev << 16) |
-			omap_i2c_read_reg(dev, OMAP_I2C_IP_V2_REVNB_LO);
+			omap_i2c_read_reg(omap, OMAP_I2C_IP_V2_REVNB_LO);
 		minor = OMAP_I2C_REV_SCHEME_1_MINOR(rev);
 		major = OMAP_I2C_REV_SCHEME_1_MAJOR(rev);
-		dev->rev = rev;
+		omap->rev = rev;
 	}
 
-	dev->errata = 0;
+	omap->errata = 0;
 
-	if (dev->rev >= OMAP_I2C_REV_ON_2430 &&
-			dev->rev < OMAP_I2C_REV_ON_4430_PLUS)
-		dev->errata |= I2C_OMAP_ERRATA_I207;
+	if (omap->rev >= OMAP_I2C_REV_ON_2430 &&
+			omap->rev < OMAP_I2C_REV_ON_4430_PLUS)
+		omap->errata |= I2C_OMAP_ERRATA_I207;
 
-	if (dev->rev <= OMAP_I2C_REV_ON_3430_3530)
-		dev->errata |= I2C_OMAP_ERRATA_I462;
+	if (omap->rev <= OMAP_I2C_REV_ON_3430_3530)
+		omap->errata |= I2C_OMAP_ERRATA_I462;
 
-	if (!(dev->flags & OMAP_I2C_FLAG_NO_FIFO)) {
+	if (!(omap->flags & OMAP_I2C_FLAG_NO_FIFO)) {
 		u16 s;
 
 		/* Set up the fifo size - Get total size */
-		s = (omap_i2c_read_reg(dev, OMAP_I2C_BUFSTAT_REG) >> 14) & 0x3;
-		dev->fifo_size = 0x8 << s;
+		s = (omap_i2c_read_reg(omap, OMAP_I2C_BUFSTAT_REG) >> 14) & 0x3;
+		omap->fifo_size = 0x8 << s;
 
 		/*
 		 * Set up notification threshold as half the total available
@@ -1394,36 +1394,36 @@ omap_i2c_probe(struct platform_device *pdev)
 		 * call back latencies.
 		 */
 
-		dev->fifo_size = (dev->fifo_size / 2);
+		omap->fifo_size = (omap->fifo_size / 2);
 
-		if (dev->rev < OMAP_I2C_REV_ON_3630)
-			dev->b_hw = 1; /* Enable hardware fixes */
+		if (omap->rev < OMAP_I2C_REV_ON_3630)
+			omap->b_hw = 1; /* Enable hardware fixes */
 
 		/* calculate wakeup latency constraint for MPU */
-		if (dev->set_mpu_wkup_lat != NULL)
-			dev->latency = (1000000 * dev->fifo_size) /
-				       (1000 * dev->speed / 8);
+		if (omap->set_mpu_wkup_lat != NULL)
+			omap->latency = (1000000 * omap->fifo_size) /
+				       (1000 * omap->speed / 8);
 	}
 
 	/* reset ASAP, clearing any IRQs */
-	omap_i2c_init(dev);
+	omap_i2c_init(omap);
 
-	if (dev->rev < OMAP_I2C_OMAP1_REV_2)
-		r = devm_request_irq(&pdev->dev, dev->irq, omap_i2c_omap1_isr,
-				IRQF_NO_SUSPEND, pdev->name, dev);
+	if (omap->rev < OMAP_I2C_OMAP1_REV_2)
+		r = devm_request_irq(&pdev->dev, omap->irq, omap_i2c_omap1_isr,
+				IRQF_NO_SUSPEND, pdev->name, omap);
 	else
-		r = devm_request_threaded_irq(&pdev->dev, dev->irq,
+		r = devm_request_threaded_irq(&pdev->dev, omap->irq,
 				omap_i2c_isr, omap_i2c_isr_thread,
 				IRQF_NO_SUSPEND | IRQF_ONESHOT,
-				pdev->name, dev);
+				pdev->name, omap);
 
 	if (r) {
-		dev_err(dev->dev, "failure requesting irq %i\n", dev->irq);
+		dev_err(omap->dev, "failure requesting irq %i\n", omap->irq);
 		goto err_unuse_clocks;
 	}
 
-	adap = &dev->adapter;
-	i2c_set_adapdata(adap, dev);
+	adap = &omap->adapter;
+	i2c_set_adapdata(adap, omap);
 	adap->owner = THIS_MODULE;
 	adap->class = I2C_CLASS_DEPRECATED;
 	strlcpy(adap->name, "OMAP I2C adapter", sizeof(adap->name));
@@ -1436,21 +1436,21 @@ omap_i2c_probe(struct platform_device *pdev)
 	adap->nr = pdev->id;
 	r = i2c_add_numbered_adapter(adap);
 	if (r) {
-		dev_err(dev->dev, "failure adding adapter\n");
+		dev_err(omap->dev, "failure adding adapter\n");
 		goto err_unuse_clocks;
 	}
 
-	dev_info(dev->dev, "bus %d rev%d.%d at %d kHz\n", adap->nr,
-		 major, minor, dev->speed);
+	dev_info(omap->dev, "bus %d rev%d.%d at %d kHz\n", adap->nr,
+		 major, minor, omap->speed);
 
-	pm_runtime_mark_last_busy(dev->dev);
-	pm_runtime_put_autosuspend(dev->dev);
+	pm_runtime_mark_last_busy(omap->dev);
+	pm_runtime_put_autosuspend(omap->dev);
 
 	return 0;
 
 err_unuse_clocks:
-	omap_i2c_write_reg(dev, OMAP_I2C_CON_REG, 0);
-	pm_runtime_put(dev->dev);
+	omap_i2c_write_reg(omap, OMAP_I2C_CON_REG, 0);
+	pm_runtime_put(omap->dev);
 	pm_runtime_disable(&pdev->dev);
 err_free_mem:
 
@@ -1459,15 +1459,15 @@ omap_i2c_probe(struct platform_device *pdev)
 
 static int omap_i2c_remove(struct platform_device *pdev)
 {
-	struct omap_i2c_dev	*dev = platform_get_drvdata(pdev);
+	struct omap_i2c_dev	*omap = platform_get_drvdata(pdev);
 	int ret;
 
-	i2c_del_adapter(&dev->adapter);
+	i2c_del_adapter(&omap->adapter);
 	ret = pm_runtime_get_sync(&pdev->dev);
 	if (ret < 0)
 		return ret;
 
-	omap_i2c_write_reg(dev, OMAP_I2C_CON_REG, 0);
+	omap_i2c_write_reg(omap, OMAP_I2C_CON_REG, 0);
 	pm_runtime_put(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 	return 0;
@@ -1476,23 +1476,23 @@ static int omap_i2c_remove(struct platform_device *pdev)
 #ifdef CONFIG_PM
 static int omap_i2c_runtime_suspend(struct device *dev)
 {
-	struct omap_i2c_dev *_dev = dev_get_drvdata(dev);
+	struct omap_i2c_dev *omap = dev_get_drvdata(dev);
 
-	_dev->iestate = omap_i2c_read_reg(_dev, OMAP_I2C_IE_REG);
+	omap->iestate = omap_i2c_read_reg(omap, OMAP_I2C_IE_REG);
 
-	if (_dev->scheme == OMAP_I2C_SCHEME_0)
-		omap_i2c_write_reg(_dev, OMAP_I2C_IE_REG, 0);
+	if (omap->scheme == OMAP_I2C_SCHEME_0)
+		omap_i2c_write_reg(omap, OMAP_I2C_IE_REG, 0);
 	else
-		omap_i2c_write_reg(_dev, OMAP_I2C_IP_V2_IRQENABLE_CLR,
+		omap_i2c_write_reg(omap, OMAP_I2C_IP_V2_IRQENABLE_CLR,
 				   OMAP_I2C_IP_V2_INTERRUPTS_MASK);
 
-	if (_dev->rev < OMAP_I2C_OMAP1_REV_2) {
-		omap_i2c_read_reg(_dev, OMAP_I2C_IV_REG); /* Read clears */
+	if (omap->rev < OMAP_I2C_OMAP1_REV_2) {
+		omap_i2c_read_reg(omap, OMAP_I2C_IV_REG); /* Read clears */
 	} else {
-		omap_i2c_write_reg(_dev, OMAP_I2C_STAT_REG, _dev->iestate);
+		omap_i2c_write_reg(omap, OMAP_I2C_STAT_REG, omap->iestate);
 
 		/* Flush posted write */
-		omap_i2c_read_reg(_dev, OMAP_I2C_STAT_REG);
+		omap_i2c_read_reg(omap, OMAP_I2C_STAT_REG);
 	}
 
 	pinctrl_pm_select_sleep_state(dev);
@@ -1502,14 +1502,14 @@ static int omap_i2c_runtime_suspend(struct device *dev)
 
 static int omap_i2c_runtime_resume(struct device *dev)
 {
-	struct omap_i2c_dev *_dev = dev_get_drvdata(dev);
+	struct omap_i2c_dev *omap = dev_get_drvdata(dev);
 
 	pinctrl_pm_select_default_state(dev);
 
-	if (!_dev->regs)
+	if (!omap->regs)
 		return 0;
 
-	__omap_i2c_init(_dev);
+	__omap_i2c_init(omap);
 
 	return 0;
 }

From 1c4828f916ffd5b7cc4d57d832cce615475b36da Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Mon, 13 Jul 2015 15:38:04 -0500
Subject: [PATCH 136/734] i2c: omap: on ->remove() call pm_runtime_put_sync()

we're about to remove the module, so we can't
really schedule a PM transition in the future,
we must wait for it to finish.

Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-omap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index e359ad39753b0a..afc3bfca0b6cea 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -1468,7 +1468,7 @@ static int omap_i2c_remove(struct platform_device *pdev)
 		return ret;
 
 	omap_i2c_write_reg(omap, OMAP_I2C_CON_REG, 0);
-	pm_runtime_put(&pdev->dev);
+	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 	return 0;
 }

From e1069878b957c5f6d0eb2f441a8372db8d6198b2 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Sat, 11 Jul 2015 09:46:22 +0200
Subject: [PATCH 137/734] clk: shmobile: emev2: deassert reset for IIC0/1

We have a driver now for IIC, so disable reset for them.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Acked-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/clk/shmobile/clk-emev2.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/clk/shmobile/clk-emev2.c b/drivers/clk/shmobile/clk-emev2.c
index 5b60beb7d0ebce..a91825471c79ac 100644
--- a/drivers/clk/shmobile/clk-emev2.c
+++ b/drivers/clk/shmobile/clk-emev2.c
@@ -28,6 +28,8 @@
 #define USIBU1_RSTCTRL 0x0ac
 #define USIBU2_RSTCTRL 0x0b0
 #define USIBU3_RSTCTRL 0x0b4
+#define IIC0_RSTCTRL 0x0dc
+#define IIC1_RSTCTRL 0x0e0
 #define STI_RSTCTRL 0x124
 #define STI_CLKSEL 0x688
 
@@ -66,6 +68,10 @@ static void __init emev2_smu_init(void)
 	emev2_smu_write(2, USIBU1_RSTCTRL);
 	emev2_smu_write(2, USIBU2_RSTCTRL);
 	emev2_smu_write(2, USIBU3_RSTCTRL);
+
+	/* deassert reset for IIC0->IIC1 */
+	emev2_smu_write(1, IIC0_RSTCTRL);
+	emev2_smu_write(1, IIC1_RSTCTRL);
 }
 
 static void __init emev2_smu_clkdiv_init(struct device_node *np)

From 5faf6e1f58b4488a8b24a722ccf317ed67a8e8d8 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Sat, 11 Jul 2015 09:46:23 +0200
Subject: [PATCH 138/734] i2c: emev2: add driver

Add a basic driver for the Renesas EMEV2 SoC. Based on the driver from
the BSP which was first worked on by Ian, and made ready for upstream by
me.

Signed-off-by: Ian Molton <ian.molton@codethink.co.uk>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 .../devicetree/bindings/i2c/i2c-emev2.txt     |  22 ++
 drivers/i2c/busses/Kconfig                    |   7 +
 drivers/i2c/busses/Makefile                   |   1 +
 drivers/i2c/busses/i2c-emev2.c                | 332 ++++++++++++++++++
 4 files changed, 362 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/i2c/i2c-emev2.txt
 create mode 100644 drivers/i2c/busses/i2c-emev2.c

diff --git a/Documentation/devicetree/bindings/i2c/i2c-emev2.txt b/Documentation/devicetree/bindings/i2c/i2c-emev2.txt
new file mode 100644
index 00000000000000..5ed1ea1c7e14a9
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-emev2.txt
@@ -0,0 +1,22 @@
+Device tree configuration for Renesas EMEV2 IIC controller
+
+Required properties:
+- compatible      : "renesas,iic-emev2"
+- reg             : address start and address range size of device
+- interrupts      : specifier for the IIC controller interrupt
+- clocks          : phandle to the IP core SCLK
+- clock-names     : must be "sclk"
+- #address-cells  : should be <1>
+- #size-cells     : should be <0>
+
+Example:
+
+	iic0: i2c@e0070000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "renesas,iic-emev2";
+		reg = <0xe0070000 0x28>;
+		interrupts = <0 32 IRQ_TYPE_EDGE_RISING>;
+		clocks = <&iic0_sclk>;
+		clock-names = "sclk";
+	};
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 577d58d1f1a198..0b798ae708fe45 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -526,6 +526,13 @@ config I2C_EG20T
 	  ML7213/ML7223/ML7831 is companion chip for Intel Atom E6xx series.
 	  ML7213/ML7223/ML7831 is completely compatible for Intel EG20T PCH.
 
+config I2C_EMEV2
+	tristate "EMMA Mobile series I2C adapter"
+	depends on HAVE_CLK
+	help
+	  If you say yes to this option, support will be included for the
+	  I2C interface on the Renesas Electronics EM/EV family of processors.
+
 config I2C_EXYNOS5
 	tristate "Exynos5 high-speed I2C driver"
 	depends on ARCH_EXYNOS && OF
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index e5f537c80da08f..50e8bbb65f1cd9 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -48,6 +48,7 @@ i2c-designware-pci-objs := i2c-designware-pcidrv.o
 obj-$(CONFIG_I2C_DIGICOLOR)	+= i2c-digicolor.o
 obj-$(CONFIG_I2C_EFM32)		+= i2c-efm32.o
 obj-$(CONFIG_I2C_EG20T)		+= i2c-eg20t.o
+obj-$(CONFIG_I2C_EMEV2)		+= i2c-emev2.o
 obj-$(CONFIG_I2C_EXYNOS5)	+= i2c-exynos5.o
 obj-$(CONFIG_I2C_GPIO)		+= i2c-gpio.o
 obj-$(CONFIG_I2C_HIGHLANDER)	+= i2c-highlander.o
diff --git a/drivers/i2c/busses/i2c-emev2.c b/drivers/i2c/busses/i2c-emev2.c
new file mode 100644
index 00000000000000..192ef6b50c7946
--- /dev/null
+++ b/drivers/i2c/busses/i2c-emev2.c
@@ -0,0 +1,332 @@
+/*
+ * I2C driver for the Renesas EMEV2 SoC
+ *
+ * Copyright (C) 2015 Wolfram Sang <wsa@sang-engineering.com>
+ * Copyright 2013 Codethink Ltd.
+ * Copyright 2010-2015 Renesas Electronics Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/completion.h>
+#include <linux/device.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/sched.h>
+
+/* I2C Registers */
+#define I2C_OFS_IICACT0		0x00	/* start */
+#define I2C_OFS_IIC0		0x04	/* shift */
+#define I2C_OFS_IICC0		0x08	/* control */
+#define I2C_OFS_SVA0		0x0c	/* slave address */
+#define I2C_OFS_IICCL0		0x10	/* clock select */
+#define I2C_OFS_IICX0		0x14	/* extension */
+#define I2C_OFS_IICS0		0x18	/* status */
+#define I2C_OFS_IICSE0		0x1c	/* status For emulation */
+#define I2C_OFS_IICF0		0x20	/* IIC flag */
+
+/* I2C IICACT0 Masks */
+#define I2C_BIT_IICE0		0x0001
+
+/* I2C IICC0 Masks */
+#define I2C_BIT_LREL0		0x0040
+#define I2C_BIT_WREL0		0x0020
+#define I2C_BIT_SPIE0		0x0010
+#define I2C_BIT_WTIM0		0x0008
+#define I2C_BIT_ACKE0		0x0004
+#define I2C_BIT_STT0		0x0002
+#define I2C_BIT_SPT0		0x0001
+
+/* I2C IICCL0 Masks */
+#define I2C_BIT_SMC0		0x0008
+#define I2C_BIT_DFC0		0x0004
+
+/* I2C IICSE0 Masks */
+#define I2C_BIT_MSTS0		0x0080
+#define I2C_BIT_ALD0		0x0040
+#define I2C_BIT_EXC0		0x0020
+#define I2C_BIT_COI0		0x0010
+#define I2C_BIT_TRC0		0x0008
+#define I2C_BIT_ACKD0		0x0004
+#define I2C_BIT_STD0		0x0002
+#define I2C_BIT_SPD0		0x0001
+
+/* I2C IICF0 Masks */
+#define I2C_BIT_STCF		0x0080
+#define I2C_BIT_IICBSY		0x0040
+#define I2C_BIT_STCEN		0x0002
+#define I2C_BIT_IICRSV		0x0001
+
+struct em_i2c_device {
+	void __iomem *base;
+	struct i2c_adapter adap;
+	struct completion msg_done;
+	struct clk *sclk;
+};
+
+static inline void em_clear_set_bit(struct em_i2c_device *priv, u8 clear, u8 set, u8 reg)
+{
+	writeb((readb(priv->base + reg) & ~clear) | set, priv->base + reg);
+}
+
+static int em_i2c_wait_for_event(struct em_i2c_device *priv)
+{
+	unsigned long time_left;
+	int status;
+
+	reinit_completion(&priv->msg_done);
+
+	time_left = wait_for_completion_timeout(&priv->msg_done, priv->adap.timeout);
+
+	if (!time_left)
+		return -ETIMEDOUT;
+
+	status = readb(priv->base + I2C_OFS_IICSE0);
+	return status & I2C_BIT_ALD0 ? -EAGAIN : status;
+}
+
+static void em_i2c_stop(struct em_i2c_device *priv)
+{
+	/* Send Stop condition */
+	em_clear_set_bit(priv, 0, I2C_BIT_SPT0 | I2C_BIT_SPIE0, I2C_OFS_IICC0);
+
+	/* Wait for stop condition */
+	em_i2c_wait_for_event(priv);
+}
+
+static void em_i2c_reset(struct i2c_adapter *adap)
+{
+	struct em_i2c_device *priv = i2c_get_adapdata(adap);
+	int retr;
+
+	/* If I2C active */
+	if (readb(priv->base + I2C_OFS_IICACT0) & I2C_BIT_IICE0) {
+		/* Disable I2C operation */
+		writeb(0, priv->base + I2C_OFS_IICACT0);
+
+		retr = 1000;
+		while (readb(priv->base + I2C_OFS_IICACT0) == 1 && retr)
+			retr--;
+		WARN_ON(retr == 0);
+	}
+
+	/* Transfer mode set */
+	writeb(I2C_BIT_DFC0, priv->base + I2C_OFS_IICCL0);
+
+	/* Can Issue start without detecting a stop, Reservation disabled. */
+	writeb(I2C_BIT_STCEN | I2C_BIT_IICRSV, priv->base + I2C_OFS_IICF0);
+
+	/* I2C enable, 9 bit interrupt mode */
+	writeb(I2C_BIT_WTIM0, priv->base + I2C_OFS_IICC0);
+
+	/* Enable I2C operation */
+	writeb(I2C_BIT_IICE0, priv->base + I2C_OFS_IICACT0);
+
+	retr = 1000;
+	while (readb(priv->base + I2C_OFS_IICACT0) == 0 && retr)
+		retr--;
+	WARN_ON(retr == 0);
+}
+
+static int __em_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msg,
+				int stop)
+{
+	struct em_i2c_device *priv = i2c_get_adapdata(adap);
+	int count, status, read = !!(msg->flags & I2C_M_RD);
+
+	/* Send start condition */
+	em_clear_set_bit(priv, 0, I2C_BIT_ACKE0 | I2C_BIT_WTIM0, I2C_OFS_IICC0);
+	em_clear_set_bit(priv, 0, I2C_BIT_STT0, I2C_OFS_IICC0);
+
+	/* Send slave address and R/W type */
+	writeb((msg->addr << 1) | read, priv->base + I2C_OFS_IIC0);
+
+	/* Wait for transaction */
+	status = em_i2c_wait_for_event(priv);
+	if (status < 0)
+		goto out_reset;
+
+	/* Received NACK (result of setting slave address and R/W) */
+	if (!(status & I2C_BIT_ACKD0)) {
+		em_i2c_stop(priv);
+		goto out;
+	}
+
+	/* Extra setup for read transactions */
+	if (read) {
+		/* 8 bit interrupt mode */
+		em_clear_set_bit(priv, I2C_BIT_WTIM0, I2C_BIT_ACKE0, I2C_OFS_IICC0);
+		em_clear_set_bit(priv, I2C_BIT_WTIM0, I2C_BIT_WREL0, I2C_OFS_IICC0);
+
+		/* Wait for transaction */
+		status = em_i2c_wait_for_event(priv);
+		if (status < 0)
+			goto out_reset;
+	}
+
+	/* Send / receive data */
+	for (count = 0; count < msg->len; count++) {
+		if (read) { /* Read transaction */
+			msg->buf[count] = readb(priv->base + I2C_OFS_IIC0);
+			em_clear_set_bit(priv, 0, I2C_BIT_WREL0, I2C_OFS_IICC0);
+
+		} else { /* Write transaction */
+			/* Received NACK */
+			if (!(status & I2C_BIT_ACKD0)) {
+				em_i2c_stop(priv);
+				goto out;
+			}
+
+			/* Write data */
+			writeb(msg->buf[count], priv->base + I2C_OFS_IIC0);
+		}
+
+		/* Wait for R/W transaction */
+		status = em_i2c_wait_for_event(priv);
+		if (status < 0)
+			goto out_reset;
+	}
+
+	if (stop)
+		em_i2c_stop(priv);
+
+	return count;
+
+out_reset:
+	em_i2c_reset(adap);
+out:
+	return status < 0 ? status : -ENXIO;
+}
+
+static int em_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
+	int num)
+{
+	struct em_i2c_device *priv = i2c_get_adapdata(adap);
+	int ret, i;
+
+	if (readb(priv->base + I2C_OFS_IICF0) & I2C_BIT_IICBSY)
+		return -EAGAIN;
+
+	for (i = 0; i < num; i++) {
+		ret = __em_i2c_xfer(adap, &msgs[i], (i == (num - 1)));
+		if (ret < 0)
+			return ret;
+	}
+
+	/* I2C transfer completed */
+	return num;
+}
+
+static irqreturn_t em_i2c_irq_handler(int this_irq, void *dev_id)
+{
+	struct em_i2c_device *priv = dev_id;
+
+	complete(&priv->msg_done);
+	return IRQ_HANDLED;
+}
+
+static u32 em_i2c_func(struct i2c_adapter *adap)
+{
+	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
+}
+
+static struct i2c_algorithm em_i2c_algo = {
+	.master_xfer = em_i2c_xfer,
+	.functionality = em_i2c_func,
+};
+
+static int em_i2c_probe(struct platform_device *pdev)
+{
+	struct em_i2c_device *priv;
+	struct resource *r;
+	int irq, ret;
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	priv->base = devm_ioremap_resource(&pdev->dev, r);
+	if (IS_ERR(priv->base))
+		return PTR_ERR(priv->base);
+
+	strlcpy(priv->adap.name, "EMEV2 I2C", sizeof(priv->adap.name));
+
+	priv->sclk = devm_clk_get(&pdev->dev, "sclk");
+	if (IS_ERR(priv->sclk))
+		return PTR_ERR(priv->sclk);
+
+	clk_prepare_enable(priv->sclk);
+
+	priv->adap.timeout = msecs_to_jiffies(100);
+	priv->adap.retries = 5;
+	priv->adap.dev.parent = &pdev->dev;
+	priv->adap.algo = &em_i2c_algo;
+	priv->adap.owner = THIS_MODULE;
+	priv->adap.dev.of_node = pdev->dev.of_node;
+
+	init_completion(&priv->msg_done);
+
+	platform_set_drvdata(pdev, priv);
+	i2c_set_adapdata(&priv->adap, priv);
+
+	em_i2c_reset(&priv->adap);
+
+	irq = platform_get_irq(pdev, 0);
+	ret = devm_request_irq(&pdev->dev, irq, em_i2c_irq_handler, 0,
+				"em_i2c", priv);
+	if (ret)
+		goto err_clk;
+
+	ret = i2c_add_adapter(&priv->adap);
+
+	if (ret)
+		goto err_clk;
+
+	dev_info(&pdev->dev, "Added i2c controller %d, irq %d\n", priv->adap.nr, irq);
+
+	return 0;
+
+err_clk:
+	clk_disable_unprepare(priv->sclk);
+	return ret;
+}
+
+static int em_i2c_remove(struct platform_device *dev)
+{
+	struct em_i2c_device *priv = platform_get_drvdata(dev);
+
+	i2c_del_adapter(&priv->adap);
+	clk_disable_unprepare(priv->sclk);
+
+	return 0;
+}
+
+static const struct of_device_id em_i2c_ids[] = {
+	{ .compatible = "renesas,iic-emev2", },
+	{ }
+};
+
+static struct platform_driver em_i2c_driver = {
+	.probe = em_i2c_probe,
+	.remove = em_i2c_remove,
+	.driver = {
+		.name = "em-i2c",
+		.of_match_table = em_i2c_ids,
+	}
+};
+module_platform_driver(em_i2c_driver);
+
+MODULE_DESCRIPTION("EMEV2 I2C bus driver");
+MODULE_AUTHOR("Ian Molton and Wolfram Sang <wsa@sang-engineering.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_DEVICE_TABLE(of, em_i2c_ids);

From abf8a1fba9a48535595857ed46ad642272eea05c Mon Sep 17 00:00:00 2001
From: Leilei Shang <shangll@marvell.com>
Date: Tue, 14 Jul 2015 13:06:40 +0530
Subject: [PATCH 139/734] i2c: pxa: keep i2c irq ON in suspend

During suspend there may still be some i2c access happening, as the
interrupt is shared between multiple drivers.
And if we don't keep i2c irq ON, there may be i2c access timeout if
i2c is in irq mode of operation.

Signed-off-by: Raul Xiong <xjian@marvell.com>
Signed-off-by: Xiaofan Tian <tianxf@marvell.com>
[vaibhav.hiremath@linaro.org: updated Changelog]
Signed-off-by: Vaibhav Hiremath <vaibhav.hiremath@linaro.org>
Cc: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-pxa.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c
index d9c0d6a17ad6c3..f4ac8c595a45dd 100644
--- a/drivers/i2c/busses/i2c-pxa.c
+++ b/drivers/i2c/busses/i2c-pxa.c
@@ -1232,8 +1232,9 @@ static int i2c_pxa_probe(struct platform_device *dev)
 		i2c->adap.algo = &i2c_pxa_pio_algorithm;
 	} else {
 		i2c->adap.algo = &i2c_pxa_algorithm;
-		ret = request_irq(irq, i2c_pxa_handler, IRQF_SHARED,
-				  dev_name(&dev->dev), i2c);
+		ret = request_irq(irq, i2c_pxa_handler,
+				IRQF_SHARED | IRQF_NO_SUSPEND,
+				dev_name(&dev->dev), i2c);
 		if (ret)
 			goto ereqirq;
 	}

From e087b4272effeacc10629316d47a257c88f82898 Mon Sep 17 00:00:00 2001
From: Vaibhav Hiremath <vaibhav.hiremath@linaro.org>
Date: Tue, 14 Jul 2015 13:06:41 +0530
Subject: [PATCH 140/734] i2c: pxa: No need to set slave addr for i2c master
 mode reset

Normally i2c controller works as master, so slave addr is not needed, or
it will impact some slave device (eg. ST NFC chip) i2c accesses, because
it has the same i2c address with controller.

For example,
On the pxa1928 based platform, where PMIC (88pm860) is present @0x30
address on TWSI0 interface, and if we set 0x30 as a slave address in
pxa1928 TWSI0 module, all the transactions towards PMIC would go for toss.

Signed-off-by: Jett.Zhou <jtzhou@marvell.com>
Signed-off-by: Vaibhav Hiremath <vaibhav.hiremath@linaro.org>
Acked-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-pxa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c
index f4ac8c595a45dd..023e59fda6503e 100644
--- a/drivers/i2c/busses/i2c-pxa.c
+++ b/drivers/i2c/busses/i2c-pxa.c
@@ -459,7 +459,7 @@ static void i2c_pxa_reset(struct pxa_i2c *i2c)
 	writel(I2C_ISR_INIT, _ISR(i2c));
 	writel(readl(_ICR(i2c)) & ~ICR_UR, _ICR(i2c));
 
-	if (i2c->reg_isar)
+	if (i2c->reg_isar && IS_ENABLED(CONFIG_I2C_PXA_SLAVE))
 		writel(i2c->slave_addr, _ISAR(i2c));
 
 	/* set control register values */

From 8bd75bd3038df5e743c7daa84c2d34d13493b395 Mon Sep 17 00:00:00 2001
From: Shouming Wang <wangshm@marvell.com>
Date: Tue, 14 Jul 2015 13:06:42 +0530
Subject: [PATCH 141/734] i2c: pxa: Return I2C_RETRY when timeout in pio mode

In case of timeout in pio mode of operation return I2C_RETRY.
This behavior will be same as interrupt mode of operation.

Signed-off-by: Shouming Wang <wangshm@marvell.com>
[vaibhav.hiremath@linaro.org: Updated changelog]
Signed-off-by: Vaibhav Hiremath <vaibhav.hiremath@linaro.org>
Acked-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-pxa.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c
index 023e59fda6503e..632008f2409891 100644
--- a/drivers/i2c/busses/i2c-pxa.c
+++ b/drivers/i2c/busses/i2c-pxa.c
@@ -745,8 +745,10 @@ static int i2c_pxa_do_pio_xfer(struct pxa_i2c *i2c,
 	ret = i2c->msg_idx;
 
 out:
-	if (timeout == 0)
+	if (timeout == 0) {
 		i2c_pxa_scream_blue_murder(i2c, "timeout");
+		ret = I2C_RETRY;
+	}
 
 	return ret;
 }

From e2b498fd55a871ab3913914b982e3dd07d32e90e Mon Sep 17 00:00:00 2001
From: Yipeng Yao <ypyao@marvell.com>
Date: Tue, 14 Jul 2015 13:06:43 +0530
Subject: [PATCH 142/734] i2c: pxa: Fix compile warning in 64bit mode

Fix below warning message, coming from 64 bit toolchain.

drivers/i2c/busses/i2c-pxa.c:1237:15:
warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]

Signed-off-by: Yipeng Yao <ypyao@marvell.com>
[vaibhav.hiremath@linaro.org: Updated Changelog]
Signed-off-by: Vaibhav Hiremath <vaibhav.hiremath@linaro.org>
Cc: Wolfram Sang <wsa@the-dreams.de>
Acked-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-pxa.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c
index 632008f2409891..d0cc058f42c390 100644
--- a/drivers/i2c/busses/i2c-pxa.c
+++ b/drivers/i2c/busses/i2c-pxa.c
@@ -1116,7 +1116,9 @@ static int i2c_pxa_probe_dt(struct platform_device *pdev, struct pxa_i2c *i2c,
 		i2c->use_pio = 1;
 	if (of_get_property(np, "mrvl,i2c-fast-mode", NULL))
 		i2c->fast_mode = 1;
-	*i2c_types = (u32)(of_id->data);
+
+	*i2c_types = (enum pxa_i2c_types)(of_id->data);
+
 	return 0;
 }
 

From 3a2dc1677b60af250bea85d470ef5151c70a1264 Mon Sep 17 00:00:00 2001
From: Vaibhav Hiremath <vaibhav.hiremath@linaro.org>
Date: Tue, 14 Jul 2015 13:06:44 +0530
Subject: [PATCH 143/734] i2c: pxa: Update debug function to dump more info on
 error

Update i2c_pxa_scream_blue_murder() fn to print more information
in case of error.
Also, use dev_err variants instead of printk.

Signed-off-by: Jett.Zhou <jtzhou@marvell.com>
Signed-off-by: Vaibhav Hiremath <vaibhav.hiremath@linaro.org>
Cc: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-pxa.c | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c
index d0cc058f42c390..beeed7c53f2d16 100644
--- a/drivers/i2c/busses/i2c-pxa.c
+++ b/drivers/i2c/busses/i2c-pxa.c
@@ -132,6 +132,7 @@ struct pxa_i2c {
 	unsigned int		msg_idx;
 	unsigned int		msg_ptr;
 	unsigned int		slave_addr;
+	unsigned int		req_slave_addr;
 
 	struct i2c_adapter	adap;
 	struct clk		*clk;
@@ -253,15 +254,20 @@ static void i2c_pxa_show_state(struct pxa_i2c *i2c, int lno, const char *fname)
 static void i2c_pxa_scream_blue_murder(struct pxa_i2c *i2c, const char *why)
 {
 	unsigned int i;
-	printk(KERN_ERR "i2c: error: %s\n", why);
-	printk(KERN_ERR "i2c: msg_num: %d msg_idx: %d msg_ptr: %d\n",
+	struct device *dev = &i2c->adap.dev;
+
+	dev_err(dev, "slave_0x%x error: %s\n",
+		i2c->req_slave_addr >> 1, why);
+	dev_err(dev, "msg_num: %d msg_idx: %d msg_ptr: %d\n",
 		i2c->msg_num, i2c->msg_idx, i2c->msg_ptr);
-	printk(KERN_ERR "i2c: ICR: %08x ISR: %08x\n",
-	       readl(_ICR(i2c)), readl(_ISR(i2c)));
-	printk(KERN_DEBUG "i2c: log: ");
+	dev_err(dev, "IBMR: %08x IDBR: %08x ICR: %08x ISR: %08x\n",
+		readl(_IBMR(i2c)), readl(_IDBR(i2c)), readl(_ICR(i2c)),
+		readl(_ISR(i2c)));
+	dev_dbg(dev, "log: ");
 	for (i = 0; i < i2c->irqlogidx; i++)
-		printk("[%08x:%08x] ", i2c->isrlog[i], i2c->icrlog[i]);
-	printk("\n");
+		pr_debug("[%08x:%08x] ", i2c->isrlog[i], i2c->icrlog[i]);
+
+	pr_debug("\n");
 }
 
 #else /* ifdef DEBUG */
@@ -638,6 +644,7 @@ static inline void i2c_pxa_start_message(struct pxa_i2c *i2c)
 	 * Step 1: target slave address into IDBR
 	 */
 	writel(i2c_pxa_addr_byte(i2c->msg), _IDBR(i2c));
+	i2c->req_slave_addr = i2c_pxa_addr_byte(i2c->msg);
 
 	/*
 	 * Step 2: initiate the write.
@@ -951,6 +958,7 @@ static void i2c_pxa_irq_txempty(struct pxa_i2c *i2c, u32 isr)
 		 * Write the next address.
 		 */
 		writel(i2c_pxa_addr_byte(i2c->msg), _IDBR(i2c));
+		i2c->req_slave_addr = i2c_pxa_addr_byte(i2c->msg);
 
 		/*
 		 * And trigger a repeated start, and send the byte.

From 51fcce86a70f6f63d733619afc425821118838dc Mon Sep 17 00:00:00 2001
From: Vaibhav Hiremath <vaibhav.hiremath@linaro.org>
Date: Tue, 14 Jul 2015 13:06:45 +0530
Subject: [PATCH 144/734] i2c: pxa: Use devm_ variants in probe function

This patch cleans up i2c_pxa_probe() function,

 - Use devm_ variants wherever
   This will clean both probe exit and i2c_pxa_remove() functions

 - Check platform resource before parsing any other data from DT/platform

 - Use dev_err on failure from i2c_add_numbered_adapter()

 - Use pr_info instead of printk for KERN_INFO

Signed-off-by: Vaibhav Hiremath <vaibhav.hiremath@linaro.org>
Acked-by: Robert Jarzmik <robert.jarzmik@free.fr>
[wsa: removed unneeded error prinout after devm_ioremap_resource]
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-pxa.c | 77 ++++++++++++------------------------
 1 file changed, 26 insertions(+), 51 deletions(-)

diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c
index beeed7c53f2d16..645e4b79d96815 100644
--- a/drivers/i2c/busses/i2c-pxa.c
+++ b/drivers/i2c/busses/i2c-pxa.c
@@ -1158,10 +1158,19 @@ static int i2c_pxa_probe(struct platform_device *dev)
 	struct resource *res = NULL;
 	int ret, irq;
 
-	i2c = kzalloc(sizeof(struct pxa_i2c), GFP_KERNEL);
-	if (!i2c) {
-		ret = -ENOMEM;
-		goto emalloc;
+	i2c = devm_kzalloc(&dev->dev, sizeof(struct pxa_i2c), GFP_KERNEL);
+	if (!i2c)
+		return -ENOMEM;
+
+	res = platform_get_resource(dev, IORESOURCE_MEM, 0);
+	i2c->reg_base = devm_ioremap_resource(&dev->dev, res);
+	if (IS_ERR(i2c->reg_base))
+		return PTR_ERR(i2c->reg_base);
+
+	irq = platform_get_irq(dev, 0);
+	if (irq < 0) {
+		dev_err(&dev->dev, "no irq resource: %d\n", irq);
+		return irq;
 	}
 
 	/* Default adapter num to device id; i2c_pxa_probe_dt can override. */
@@ -1171,19 +1180,7 @@ static int i2c_pxa_probe(struct platform_device *dev)
 	if (ret > 0)
 		ret = i2c_pxa_probe_pdata(dev, i2c, &i2c_type);
 	if (ret < 0)
-		goto eclk;
-
-	res = platform_get_resource(dev, IORESOURCE_MEM, 0);
-	irq = platform_get_irq(dev, 0);
-	if (res == NULL || irq < 0) {
-		ret = -ENODEV;
-		goto eclk;
-	}
-
-	if (!request_mem_region(res->start, resource_size(res), res->name)) {
-		ret = -ENOMEM;
-		goto eclk;
-	}
+		return ret;
 
 	i2c->adap.owner   = THIS_MODULE;
 	i2c->adap.retries = 5;
@@ -1193,16 +1190,10 @@ static int i2c_pxa_probe(struct platform_device *dev)
 
 	strlcpy(i2c->adap.name, "pxa_i2c-i2c", sizeof(i2c->adap.name));
 
-	i2c->clk = clk_get(&dev->dev, NULL);
+	i2c->clk = devm_clk_get(&dev->dev, NULL);
 	if (IS_ERR(i2c->clk)) {
-		ret = PTR_ERR(i2c->clk);
-		goto eclk;
-	}
-
-	i2c->reg_base = ioremap(res->start, resource_size(res));
-	if (!i2c->reg_base) {
-		ret = -EIO;
-		goto eremap;
+		dev_err(&dev->dev, "failed to get the clk: %ld\n", PTR_ERR(i2c->clk));
+		return PTR_ERR(i2c->clk);
 	}
 
 	i2c->reg_ibmr = i2c->reg_base + pxa_reg_layout[i2c_type].ibmr;
@@ -1244,11 +1235,13 @@ static int i2c_pxa_probe(struct platform_device *dev)
 		i2c->adap.algo = &i2c_pxa_pio_algorithm;
 	} else {
 		i2c->adap.algo = &i2c_pxa_algorithm;
-		ret = request_irq(irq, i2c_pxa_handler,
+		ret = devm_request_irq(&dev->dev, irq, i2c_pxa_handler,
 				IRQF_SHARED | IRQF_NO_SUSPEND,
 				dev_name(&dev->dev), i2c);
-		if (ret)
+		if (ret) {
+			dev_err(&dev->dev, "failed to request irq: %d\n", ret);
 			goto ereqirq;
+		}
 	}
 
 	i2c_pxa_reset(i2c);
@@ -1261,33 +1254,22 @@ static int i2c_pxa_probe(struct platform_device *dev)
 
 	ret = i2c_add_numbered_adapter(&i2c->adap);
 	if (ret < 0) {
-		printk(KERN_INFO "I2C: Failed to add bus\n");
-		goto eadapt;
+		dev_err(&dev->dev, "failed to add bus: %d\n", ret);
+		goto ereqirq;
 	}
 
 	platform_set_drvdata(dev, i2c);
 
 #ifdef CONFIG_I2C_PXA_SLAVE
-	printk(KERN_INFO "I2C: %s: PXA I2C adapter, slave address %d\n",
-	       dev_name(&i2c->adap.dev), i2c->slave_addr);
+	dev_info(&i2c->adap.dev, " PXA I2C adapter, slave address %d\n",
+		i2c->slave_addr);
 #else
-	printk(KERN_INFO "I2C: %s: PXA I2C adapter\n",
-	       dev_name(&i2c->adap.dev));
+	dev_info(&i2c->adap.dev, " PXA I2C adapter\n");
 #endif
 	return 0;
 
-eadapt:
-	if (!i2c->use_pio)
-		free_irq(irq, i2c);
 ereqirq:
 	clk_disable_unprepare(i2c->clk);
-	iounmap(i2c->reg_base);
-eremap:
-	clk_put(i2c->clk);
-eclk:
-	kfree(i2c);
-emalloc:
-	release_mem_region(res->start, resource_size(res));
 	return ret;
 }
 
@@ -1296,15 +1278,8 @@ static int i2c_pxa_remove(struct platform_device *dev)
 	struct pxa_i2c *i2c = platform_get_drvdata(dev);
 
 	i2c_del_adapter(&i2c->adap);
-	if (!i2c->use_pio)
-		free_irq(i2c->irq, i2c);
 
 	clk_disable_unprepare(i2c->clk);
-	clk_put(i2c->clk);
-
-	iounmap(i2c->reg_base);
-	release_mem_region(i2c->iobase, i2c->iosize);
-	kfree(i2c);
 
 	return 0;
 }

From 0387fc1645fed3b723e8556d83452bc09f79fd24 Mon Sep 17 00:00:00 2001
From: Shubhrajyoti Datta <shubhraj@xilinx.com>
Date: Wed, 17 Jun 2015 20:48:11 +0530
Subject: [PATCH 145/734] i2c: xiic: Remove the disabling of interrupts

Currently the interrupts are disabled at the start of the
isr and enabled at the end of the isr. Remove the same.

In case the slave device NACKs the transaction while in the isr
the transfer will continue and the NACK interrupt will arrive
only after the isr is serviced.

Signed-off-by: Shubhrajyoti Datta <shubhraj@xilinx.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-xiic.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c
index 4dda23f22a67b4..912780a3a65ebf 100644
--- a/drivers/i2c/busses/i2c-xiic.c
+++ b/drivers/i2c/busses/i2c-xiic.c
@@ -604,14 +604,11 @@ static irqreturn_t xiic_isr(int irq, void *dev_id)
 	struct xiic_i2c *i2c = dev_id;
 
 	spin_lock(&i2c->lock);
-	/* disable interrupts globally */
-	xiic_setreg32(i2c, XIIC_DGIER_OFFSET, 0);
 
 	dev_dbg(i2c->adap.dev.parent, "%s entry\n", __func__);
 
 	xiic_process(i2c);
 
-	xiic_setreg32(i2c, XIIC_DGIER_OFFSET, XIIC_GINTR_ENABLE_MASK);
 	spin_unlock(&i2c->lock);
 
 	return IRQ_HANDLED;

From fcc2fac60a95b4e63682ae4128e86d8768602fab Mon Sep 17 00:00:00 2001
From: Shubhrajyoti Datta <shubhraj@xilinx.com>
Date: Wed, 17 Jun 2015 20:48:12 +0530
Subject: [PATCH 146/734] i2c: xiic: move the xiic_process to thread context

The xiic_process is a 154 line code that runs in isr context currently
move it to thread context. Also the name xiic_process suggests that the
intension was to run in process context.

Signed-off-by: Shubhrajyoti Datta <shubhraj@xilinx.com>
[wsa: initialized irqreturn_t to IRQ_NONE instead of IRQ_HANDLED]
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-xiic.c | 33 ++++++++++++++++++++-------------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c
index 912780a3a65ebf..3664dffdcab731 100644
--- a/drivers/i2c/busses/i2c-xiic.c
+++ b/drivers/i2c/busses/i2c-xiic.c
@@ -358,8 +358,9 @@ static void xiic_wakeup(struct xiic_i2c *i2c, int code)
 	wake_up(&i2c->wait);
 }
 
-static void xiic_process(struct xiic_i2c *i2c)
+static irqreturn_t xiic_process(int irq, void *dev_id)
 {
+	struct xiic_i2c *i2c = dev_id;
 	u32 pend, isr, ier;
 	u32 clr = 0;
 
@@ -368,6 +369,7 @@ static void xiic_process(struct xiic_i2c *i2c)
 	 * To find which interrupts are pending; AND interrupts pending with
 	 * interrupts masked.
 	 */
+	spin_lock(&i2c->lock);
 	isr = xiic_getreg32(i2c, XIIC_IISR_OFFSET);
 	ier = xiic_getreg32(i2c, XIIC_IIER_OFFSET);
 	pend = isr & ier;
@@ -378,11 +380,6 @@ static void xiic_process(struct xiic_i2c *i2c)
 		__func__, xiic_getreg8(i2c, XIIC_SR_REG_OFFSET),
 		i2c->tx_msg, i2c->nmsgs);
 
-	/* Do not processes a devices interrupts if the device has no
-	 * interrupts pending
-	 */
-	if (!pend)
-		return;
 
 	/* Service requesting interrupt */
 	if ((pend & XIIC_INTR_ARB_LOST_MASK) ||
@@ -502,6 +499,8 @@ static void xiic_process(struct xiic_i2c *i2c)
 	dev_dbg(i2c->adap.dev.parent, "%s clr: 0x%x\n", __func__, clr);
 
 	xiic_setreg32(i2c, XIIC_IISR_OFFSET, clr);
+	spin_unlock(&i2c->lock);
+	return IRQ_HANDLED;
 }
 
 static int xiic_bus_busy(struct xiic_i2c *i2c)
@@ -602,16 +601,21 @@ static void xiic_start_send(struct xiic_i2c *i2c)
 static irqreturn_t xiic_isr(int irq, void *dev_id)
 {
 	struct xiic_i2c *i2c = dev_id;
-
-	spin_lock(&i2c->lock);
+	u32 pend, isr, ier;
+	irqreturn_t ret = IRQ_NONE;
+	/* Do not processes a devices interrupts if the device has no
+	 * interrupts pending
+	 */
 
 	dev_dbg(i2c->adap.dev.parent, "%s entry\n", __func__);
 
-	xiic_process(i2c);
-
-	spin_unlock(&i2c->lock);
+	isr = xiic_getreg32(i2c, XIIC_IISR_OFFSET);
+	ier = xiic_getreg32(i2c, XIIC_IIER_OFFSET);
+	pend = isr & ier;
+	if (pend)
+		ret = IRQ_WAKE_THREAD;
 
-	return IRQ_HANDLED;
+	return ret;
 }
 
 static void __xiic_start_xfer(struct xiic_i2c *i2c)
@@ -752,7 +756,10 @@ static int xiic_i2c_probe(struct platform_device *pdev)
 	spin_lock_init(&i2c->lock);
 	init_waitqueue_head(&i2c->wait);
 
-	ret = devm_request_irq(&pdev->dev, irq, xiic_isr, 0, pdev->name, i2c);
+	ret = devm_request_threaded_irq(&pdev->dev, irq, xiic_isr,
+					xiic_process, IRQF_ONESHOT,
+					pdev->name, i2c);
+
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Cannot claim IRQ\n");
 		return ret;

From d701667bb3318f27152ad61a6f5cc8b36cc5fcad Mon Sep 17 00:00:00 2001
From: Shubhrajyoti Datta <shubhraj@xilinx.com>
Date: Wed, 17 Jun 2015 20:48:13 +0530
Subject: [PATCH 147/734] i2c: xiic: Do not reset controller before every
 transfer

Currently before every transfer the controller is reinitialised.
We are already resetting the controller upon errors so upon every
transfer is a performance kill.
Remove the same.

Signed-off-by: Shubhrajyoti Datta <shubhraj@xilinx.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-xiic.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c
index 3664dffdcab731..43bcfeddc54e23 100644
--- a/drivers/i2c/busses/i2c-xiic.c
+++ b/drivers/i2c/busses/i2c-xiic.c
@@ -667,7 +667,6 @@ static void xiic_start_xfer(struct xiic_i2c *i2c)
 	unsigned long flags;
 
 	spin_lock_irqsave(&i2c->lock, flags);
-	xiic_reinit(i2c);
 	/* disable interrupts globally */
 	xiic_setreg32(i2c, XIIC_DGIER_OFFSET, 0);
 	spin_unlock_irqrestore(&i2c->lock, flags);

From e6c9a037bc8afdce055a2a23bb61903b3844dfda Mon Sep 17 00:00:00 2001
From: Shubhrajyoti Datta <shubhraj@xilinx.com>
Date: Wed, 17 Jun 2015 20:48:14 +0530
Subject: [PATCH 148/734] i2c: xiic: Remove the disabling of interrupts

Currently before every transfer the interrupts are disabled.
So incase the slave nacks in the middle of the transfer the
current transfer is not aborted. Upon enabling the interrupts
conditions like NACK , arbitration lost will not be masked.
Remove the disabling of the interrupts.

Signed-off-by: Shubhrajyoti Datta <shubhraj@xilinx.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-xiic.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c
index 43bcfeddc54e23..5d133d487ec02c 100644
--- a/drivers/i2c/busses/i2c-xiic.c
+++ b/drivers/i2c/busses/i2c-xiic.c
@@ -664,15 +664,8 @@ static void __xiic_start_xfer(struct xiic_i2c *i2c)
 
 static void xiic_start_xfer(struct xiic_i2c *i2c)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&i2c->lock, flags);
-	/* disable interrupts globally */
-	xiic_setreg32(i2c, XIIC_DGIER_OFFSET, 0);
-	spin_unlock_irqrestore(&i2c->lock, flags);
 
 	__xiic_start_xfer(i2c);
-	xiic_setreg32(i2c, XIIC_DGIER_OFFSET, XIIC_GINTR_ENABLE_MASK);
 }
 
 static int xiic_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)

From b33aa252785eb6de2f5325a3338fa917a9b4bc66 Mon Sep 17 00:00:00 2001
From: Shubhrajyoti Datta <shubhraj@xilinx.com>
Date: Wed, 17 Jun 2015 20:48:15 +0530
Subject: [PATCH 149/734] i2c: xiic: Remove busy loop while waiting for bus
 busy

Remove the busy loop  while waiting for bus busy.
Instead let the processor sleep.

Signed-off-by: Shubhrajyoti Datta <shubhraj@xilinx.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-xiic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c
index 5d133d487ec02c..41da9028b6ea8e 100644
--- a/drivers/i2c/busses/i2c-xiic.c
+++ b/drivers/i2c/busses/i2c-xiic.c
@@ -524,7 +524,7 @@ static int xiic_busy(struct xiic_i2c *i2c)
 	 */
 	err = xiic_bus_busy(i2c);
 	while (err && tries--) {
-		mdelay(1);
+		msleep(1);
 		err = xiic_bus_busy(i2c);
 	}
 

From 542e2a9b739068c1b72db530987382662201469a Mon Sep 17 00:00:00 2001
From: Shubhrajyoti Datta <shubhraj@xilinx.com>
Date: Wed, 17 Jun 2015 20:48:17 +0530
Subject: [PATCH 150/734] i2c: xiic: Remove the Addressed as slave interrupt

Currently there is no slave mode support in the driver
also in the isr we just ack it and do nothing.
So disable the AAS interrupt.

Signed-off-by: Shubhrajyoti Datta <shubhraj@xilinx.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-xiic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c
index 41da9028b6ea8e..08dfb8c88bbb72 100644
--- a/drivers/i2c/busses/i2c-xiic.c
+++ b/drivers/i2c/busses/i2c-xiic.c
@@ -283,7 +283,7 @@ static void xiic_reinit(struct xiic_i2c *i2c)
 	/* Enable interrupts */
 	xiic_setreg32(i2c, XIIC_DGIER_OFFSET, XIIC_GINTR_ENABLE_MASK);
 
-	xiic_irq_clr_en(i2c, XIIC_INTR_AAS_MASK | XIIC_INTR_ARB_LOST_MASK);
+	xiic_irq_clr_en(i2c, XIIC_INTR_ARB_LOST_MASK);
 }
 
 static void xiic_deinit(struct xiic_i2c *i2c)

From 7f9906bd7f7211327ba58657caa216608f9c93de Mon Sep 17 00:00:00 2001
From: Shubhrajyoti Datta <shubhraj@xilinx.com>
Date: Wed, 17 Jun 2015 20:48:18 +0530
Subject: [PATCH 151/734] i2c: xiic: Service all interrupts in isr

Currently only one interrupt is serviced in the isr.
In case the multiple interrupts happen simultenously we service and ack
only one of them. Check for all the causes in the isr and service them.

Signed-off-by: Shubhrajyoti Datta <shubhraj@xilinx.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-xiic.c | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c
index 08dfb8c88bbb72..987a18c8b6e338 100644
--- a/drivers/i2c/busses/i2c-xiic.c
+++ b/drivers/i2c/busses/i2c-xiic.c
@@ -401,11 +401,11 @@ static irqreturn_t xiic_process(int irq, void *dev_id)
 
 		if (i2c->tx_msg)
 			xiic_wakeup(i2c, STATE_ERROR);
-
-	} else if (pend & XIIC_INTR_RX_FULL_MASK) {
+	}
+	if (pend & XIIC_INTR_RX_FULL_MASK) {
 		/* Receive register/FIFO is full */
 
-		clr = XIIC_INTR_RX_FULL_MASK;
+		clr |= XIIC_INTR_RX_FULL_MASK;
 		if (!i2c->rx_msg) {
 			dev_dbg(i2c->adap.dev.parent,
 				"%s unexpexted RX IRQ\n", __func__);
@@ -438,9 +438,10 @@ static irqreturn_t xiic_process(int irq, void *dev_id)
 				__xiic_start_xfer(i2c);
 			}
 		}
-	} else if (pend & XIIC_INTR_BNB_MASK) {
+	}
+	if (pend & XIIC_INTR_BNB_MASK) {
 		/* IIC bus has transitioned to not busy */
-		clr = XIIC_INTR_BNB_MASK;
+		clr |= XIIC_INTR_BNB_MASK;
 
 		/* The bus is not busy, disable BusNotBusy interrupt */
 		xiic_irq_dis(i2c, XIIC_INTR_BNB_MASK);
@@ -453,12 +454,12 @@ static irqreturn_t xiic_process(int irq, void *dev_id)
 			xiic_wakeup(i2c, STATE_DONE);
 		else
 			xiic_wakeup(i2c, STATE_ERROR);
-
-	} else if (pend & (XIIC_INTR_TX_EMPTY_MASK | XIIC_INTR_TX_HALF_MASK)) {
+	}
+	if (pend & (XIIC_INTR_TX_EMPTY_MASK | XIIC_INTR_TX_HALF_MASK)) {
 		/* Transmit register/FIFO is empty or ½ empty */
 
-		clr = pend &
-			(XIIC_INTR_TX_EMPTY_MASK | XIIC_INTR_TX_HALF_MASK);
+		clr |= (pend &
+			(XIIC_INTR_TX_EMPTY_MASK | XIIC_INTR_TX_HALF_MASK));
 
 		if (!i2c->tx_msg) {
 			dev_dbg(i2c->adap.dev.parent,
@@ -489,11 +490,6 @@ static irqreturn_t xiic_process(int irq, void *dev_id)
 			 * make sure to disable tx half
 			 */
 			xiic_irq_dis(i2c, XIIC_INTR_TX_HALF_MASK);
-	} else {
-		/* got IRQ which is not acked */
-		dev_err(i2c->adap.dev.parent, "%s Got unexpected IRQ\n",
-			__func__);
-		clr = pend;
 	}
 out:
 	dev_dbg(i2c->adap.dev.parent, "%s clr: 0x%x\n", __func__, clr);

From 6b0c8dc3104253a5e4b2ec923eeb48cece0abcb9 Mon Sep 17 00:00:00 2001
From: Shubhrajyoti Datta <shubhraj@xilinx.com>
Date: Wed, 17 Jun 2015 20:48:19 +0530
Subject: [PATCH 152/734] i2c: xiic: Do not continue in case of errors in Rx

In case of error conditions like Arbitration lost or NACK lets signal
the waiting process.

Handle error cases in the Rx path

Signed-off-by: Shubhrajyoti Datta <shubhraj@xilinx.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-xiic.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c
index 987a18c8b6e338..e23a7b068c6019 100644
--- a/drivers/i2c/busses/i2c-xiic.c
+++ b/drivers/i2c/busses/i2c-xiic.c
@@ -399,6 +399,8 @@ static irqreturn_t xiic_process(int irq, void *dev_id)
 		 */
 		xiic_reinit(i2c);
 
+		if (i2c->rx_msg)
+			xiic_wakeup(i2c, STATE_ERROR);
 		if (i2c->tx_msg)
 			xiic_wakeup(i2c, STATE_ERROR);
 	}

From 77441ac00d324c037c088da090fa505b45dad9d4 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Tue, 14 Jul 2015 14:07:08 +0200
Subject: [PATCH 153/734] i2c: omap: fix cleanup regression

Patch "i2c: omap: abolish variable name confusion" triggered a
coccinelle warning which we fix here:

drivers/i2c/busses/i2c-omap.c:1333:5-24: pm_runtime_get_sync returns < 0 as error. Unecessary IS_ERR_VALUE at line 1334

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-omap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index afc3bfca0b6cea..08d26ba61ed332 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -1342,7 +1342,7 @@ omap_i2c_probe(struct platform_device *pdev)
 	pm_runtime_use_autosuspend(omap->dev);
 
 	r = pm_runtime_get_sync(omap->dev);
-	if (IS_ERR_VALUE(r))
+	if (r < 0)
 		goto err_free_mem;
 
 	/*

From 6f4664b2e2c2cfa35b48271423c5e602b6970f14 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Tue, 30 Jun 2015 16:24:26 +0530
Subject: [PATCH 154/734] i2c: tegra: update CONFIG_LOAD for new conifiguration

Once the new configuration is set on the conifg register of
I2C controller, it is require to update the CONFIG_LOAD register
to transfer the new SW configuration to actual HW internal
registers that would be used in the actual logic.

It is like, SW is programming only shadow registers through
regular configuration and when these load_config bit fields
are set to 1, it causes the regular/shadows registers
configuration transferred to the HW internal active registers.
So SW has to set these bit fields at the end of all regular
registers configuration. And these config_load bits are HW
auto-clear bits. HW clears these bit fields once the register
configuration is moved to HW internal active registers. So SW
has to wait until these bits are auto-cleared before going
for any further programming

This mechanism is supported on T124 and after this SoCs.

Signed-off-by: Chaitanya Bandi <bandik@nvidia.com>
Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Tested-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-tegra.c | 35 ++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c
index 78a36681469674..348870acfef237 100644
--- a/drivers/i2c/busses/i2c-tegra.c
+++ b/drivers/i2c/busses/i2c-tegra.c
@@ -100,6 +100,12 @@
 #define I2C_HEADER_CONTINUE_XFER		(1<<15)
 #define I2C_HEADER_MASTER_ADDR_SHIFT		12
 #define I2C_HEADER_SLAVE_ADDR_SHIFT		1
+
+#define I2C_CONFIG_LOAD				0x08C
+#define I2C_MSTR_CONFIG_LOAD			(1 << 0)
+#define I2C_SLV_CONFIG_LOAD			(1 << 1)
+#define I2C_TIMEOUT_CONFIG_LOAD			(1 << 2)
+
 /*
  * msg_end_type: The bus control which need to be send at end of transfer.
  * @MSG_END_STOP: Send stop pulse at end of transfer.
@@ -121,6 +127,8 @@ enum msg_end_type {
  * @has_single_clk_source: The i2c controller has single clock source. Tegra30
  *		and earlier Socs has two clock sources i.e. div-clk and
  *		fast-clk.
+ * @has_config_load_reg: Has the config load register to load the new
+ *		configuration.
  * @clk_divisor_hs_mode: Clock divisor in HS mode.
  * @clk_divisor_std_fast_mode: Clock divisor in standard/fast mode. It is
  *		applicable if there is no fast clock source i.e. single clock
@@ -131,6 +139,7 @@ struct tegra_i2c_hw_feature {
 	bool has_continue_xfer_support;
 	bool has_per_pkt_xfer_complete_irq;
 	bool has_single_clk_source;
+	bool has_config_load_reg;
 	int clk_divisor_hs_mode;
 	int clk_divisor_std_fast_mode;
 };
@@ -410,6 +419,7 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev)
 	u32 val;
 	int err = 0;
 	u32 clk_divisor;
+	unsigned long timeout = jiffies + HZ;
 
 	err = tegra_i2c_clock_enable(i2c_dev);
 	if (err < 0) {
@@ -451,6 +461,18 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev)
 	if (tegra_i2c_flush_fifos(i2c_dev))
 		err = -ETIMEDOUT;
 
+	if (i2c_dev->hw->has_config_load_reg) {
+		i2c_writel(i2c_dev, I2C_MSTR_CONFIG_LOAD, I2C_CONFIG_LOAD);
+		while (i2c_readl(i2c_dev, I2C_CONFIG_LOAD) != 0) {
+			if (time_after(jiffies, timeout)) {
+				dev_warn(i2c_dev->dev,
+					"timeout waiting for config load\n");
+				return -ETIMEDOUT;
+			}
+			msleep(1);
+		}
+	}
+
 	tegra_i2c_clock_disable(i2c_dev);
 
 	if (i2c_dev->irq_disabled) {
@@ -681,6 +703,7 @@ static const struct tegra_i2c_hw_feature tegra20_i2c_hw = {
 	.has_single_clk_source = false,
 	.clk_divisor_hs_mode = 3,
 	.clk_divisor_std_fast_mode = 0,
+	.has_config_load_reg = false,
 };
 
 static const struct tegra_i2c_hw_feature tegra30_i2c_hw = {
@@ -689,6 +712,7 @@ static const struct tegra_i2c_hw_feature tegra30_i2c_hw = {
 	.has_single_clk_source = false,
 	.clk_divisor_hs_mode = 3,
 	.clk_divisor_std_fast_mode = 0,
+	.has_config_load_reg = false,
 };
 
 static const struct tegra_i2c_hw_feature tegra114_i2c_hw = {
@@ -697,10 +721,21 @@ static const struct tegra_i2c_hw_feature tegra114_i2c_hw = {
 	.has_single_clk_source = true,
 	.clk_divisor_hs_mode = 1,
 	.clk_divisor_std_fast_mode = 0x19,
+	.has_config_load_reg = false,
+};
+
+static const struct tegra_i2c_hw_feature tegra124_i2c_hw = {
+	.has_continue_xfer_support = true,
+	.has_per_pkt_xfer_complete_irq = true,
+	.has_single_clk_source = true,
+	.clk_divisor_hs_mode = 1,
+	.clk_divisor_std_fast_mode = 0x19,
+	.has_config_load_reg = true,
 };
 
 /* Match table for of_platform binding */
 static const struct of_device_id tegra_i2c_of_match[] = {
+	{ .compatible = "nvidia,tegra124-i2c", .data = &tegra124_i2c_hw, },
 	{ .compatible = "nvidia,tegra114-i2c", .data = &tegra114_i2c_hw, },
 	{ .compatible = "nvidia,tegra30-i2c", .data = &tegra30_i2c_hw, },
 	{ .compatible = "nvidia,tegra20-i2c", .data = &tegra20_i2c_hw, },

From d57f5dedde18253d5c72a823c0a7ff3b20b57560 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Tue, 30 Jun 2015 16:24:27 +0530
Subject: [PATCH 155/734] i2c: tegra: add support for fast plus (FM+) mode
 clock rate

Tegra I2C controller required to configure the clock divisor
register inside controller to different value based on the clock
speed. The recommended clock divisor for the I2C controller for
standard/fast mode is 0x19 and for fast-mode plus is 0x10.

Add support to configure clock divisor register of I2C controller
based on bus clock rate.

This clock divisor is supported form T114 onwards.

Signed-off-by: Chaitanya Bandi <bandik@nvidia.com>
Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Tested-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-tegra.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c
index 348870acfef237..b7e1a365542100 100644
--- a/drivers/i2c/busses/i2c-tegra.c
+++ b/drivers/i2c/busses/i2c-tegra.c
@@ -142,6 +142,7 @@ struct tegra_i2c_hw_feature {
 	bool has_config_load_reg;
 	int clk_divisor_hs_mode;
 	int clk_divisor_std_fast_mode;
+	u16 clk_divisor_fast_plus_mode;
 };
 
 /**
@@ -181,6 +182,7 @@ struct tegra_i2c_dev {
 	size_t msg_buf_remaining;
 	int msg_read;
 	u32 bus_clk_rate;
+	u16 clk_divisor_non_hs_mode;
 	bool is_suspended;
 };
 
@@ -441,7 +443,7 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev)
 
 	/* Make sure clock divisor programmed correctly */
 	clk_divisor = i2c_dev->hw->clk_divisor_hs_mode;
-	clk_divisor |= i2c_dev->hw->clk_divisor_std_fast_mode <<
+	clk_divisor |= i2c_dev->clk_divisor_non_hs_mode <<
 					I2C_CLK_DIVISOR_STD_FAST_MODE_SHIFT;
 	i2c_writel(i2c_dev, clk_divisor, I2C_CLK_DIVISOR);
 
@@ -703,6 +705,7 @@ static const struct tegra_i2c_hw_feature tegra20_i2c_hw = {
 	.has_single_clk_source = false,
 	.clk_divisor_hs_mode = 3,
 	.clk_divisor_std_fast_mode = 0,
+	.clk_divisor_fast_plus_mode = 0,
 	.has_config_load_reg = false,
 };
 
@@ -712,6 +715,7 @@ static const struct tegra_i2c_hw_feature tegra30_i2c_hw = {
 	.has_single_clk_source = false,
 	.clk_divisor_hs_mode = 3,
 	.clk_divisor_std_fast_mode = 0,
+	.clk_divisor_fast_plus_mode = 0,
 	.has_config_load_reg = false,
 };
 
@@ -721,6 +725,7 @@ static const struct tegra_i2c_hw_feature tegra114_i2c_hw = {
 	.has_single_clk_source = true,
 	.clk_divisor_hs_mode = 1,
 	.clk_divisor_std_fast_mode = 0x19,
+	.clk_divisor_fast_plus_mode = 0x10,
 	.has_config_load_reg = false,
 };
 
@@ -730,6 +735,7 @@ static const struct tegra_i2c_hw_feature tegra124_i2c_hw = {
 	.has_single_clk_source = true,
 	.clk_divisor_hs_mode = 1,
 	.clk_divisor_std_fast_mode = 0x19,
+	.clk_divisor_fast_plus_mode = 0x10,
 	.has_config_load_reg = true,
 };
 
@@ -828,7 +834,14 @@ static int tegra_i2c_probe(struct platform_device *pdev)
 		}
 	}
 
-	clk_multiplier *= (i2c_dev->hw->clk_divisor_std_fast_mode + 1);
+	i2c_dev->clk_divisor_non_hs_mode =
+			i2c_dev->hw->clk_divisor_std_fast_mode;
+	if (i2c_dev->hw->clk_divisor_fast_plus_mode &&
+		(i2c_dev->bus_clk_rate == 1000000))
+		i2c_dev->clk_divisor_non_hs_mode =
+			i2c_dev->hw->clk_divisor_fast_plus_mode;
+
+	clk_multiplier *= (i2c_dev->clk_divisor_non_hs_mode + 1);
 	ret = clk_set_rate(i2c_dev->div_clk,
 			   i2c_dev->bus_clk_rate * clk_multiplier);
 	if (ret) {

From 63cab195bf498676619951e81ad5791e9d47c420 Mon Sep 17 00:00:00 2001
From: Anurag Kumar Vulisha <anurag.kumar.vulisha@xilinx.com>
Date: Fri, 10 Jul 2015 20:10:14 +0530
Subject: [PATCH 156/734] i2c: removed work arounds in i2c driver for Zynq
 Ultrascale+ MPSoC

Cadence 1.0 version has bugs which have been fixed in the cadence 1.4 version.
This patch removes the quirks present in the driver for cadence 1.4 version.

Signed-off-by: Anurag Kumar Vulisha <anuragku@xilinx.com>
[wsa: fixed indentation issues in r1p10_i2c_def]
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 .../devicetree/bindings/i2c/i2c-cadence.txt   |  6 +-
 drivers/i2c/busses/i2c-cadence.c              | 68 ++++++++++++++++---
 2 files changed, 62 insertions(+), 12 deletions(-)

diff --git a/Documentation/devicetree/bindings/i2c/i2c-cadence.txt b/Documentation/devicetree/bindings/i2c/i2c-cadence.txt
index 7cb0b5608f495b..ebaa90c58c8e72 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-cadence.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-cadence.txt
@@ -2,7 +2,11 @@ Binding for the Cadence I2C controller
 
 Required properties:
   - reg: Physical base address and size of the controller's register area.
-  - compatible: Compatibility string. Must be 'cdns,i2c-r1p10'.
+  - compatible: Should contain one of:
+		* "cdns,i2c-r1p10"
+		Note:	Use this when cadence i2c controller version 1.0 is used.
+		* "cdns,i2c-r1p14"
+		Note:	Use this when cadence i2c controller version 1.4 is used.
   - clocks: Input clock specifier. Refer to common clock bindings.
   - interrupts: Interrupt specifier. Refer to interrupt bindings.
   - #address-cells: Should be 1.
diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c
index 2ee78e099d3047..18e48ae4a60da6 100644
--- a/drivers/i2c/busses/i2c-cadence.c
+++ b/drivers/i2c/busses/i2c-cadence.c
@@ -17,6 +17,7 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/of.h>
 
 /* Register offsets for the I2C device. */
 #define CDNS_I2C_CR_OFFSET		0x00 /* Control Register, RW */
@@ -113,6 +114,8 @@
 
 #define CDNS_I2C_TIMEOUT_MAX	0xFF
 
+#define CDNS_I2C_BROKEN_HOLD_BIT	BIT(0)
+
 #define cdns_i2c_readreg(offset)       readl_relaxed(id->membase + offset)
 #define cdns_i2c_writereg(val, offset) writel_relaxed(val, id->membase + offset)
 
@@ -135,6 +138,7 @@
  * @bus_hold_flag:	Flag used in repeated start for clearing HOLD bit
  * @clk:		Pointer to struct clk
  * @clk_rate_change_nb:	Notifier block for clock rate changes
+ * @quirks:		flag for broken hold bit usage in r1p10
  */
 struct cdns_i2c {
 	void __iomem *membase;
@@ -154,6 +158,11 @@ struct cdns_i2c {
 	unsigned int bus_hold_flag;
 	struct clk *clk;
 	struct notifier_block clk_rate_change_nb;
+	u32 quirks;
+};
+
+struct cdns_platform_data {
+	u32 quirks;
 };
 
 #define to_cdns_i2c(_nb)	container_of(_nb, struct cdns_i2c, \
@@ -172,6 +181,12 @@ static void cdns_i2c_clear_bus_hold(struct cdns_i2c *id)
 		cdns_i2c_writereg(reg & ~CDNS_I2C_CR_HOLD, CDNS_I2C_CR_OFFSET);
 }
 
+static inline bool cdns_is_holdquirk(struct cdns_i2c *id, bool hold_wrkaround)
+{
+	return (hold_wrkaround &&
+		(id->curr_recv_count == CDNS_I2C_FIFO_DEPTH + 1));
+}
+
 /**
  * cdns_i2c_isr - Interrupt handler for the I2C device
  * @irq:	irq number for the I2C device
@@ -186,6 +201,7 @@ static irqreturn_t cdns_i2c_isr(int irq, void *ptr)
 {
 	unsigned int isr_status, avail_bytes, updatetx;
 	unsigned int bytes_to_send;
+	bool hold_quirk;
 	struct cdns_i2c *id = ptr;
 	/* Signal completion only after everything is updated */
 	int done_flag = 0;
@@ -208,6 +224,8 @@ static irqreturn_t cdns_i2c_isr(int irq, void *ptr)
 	if (id->recv_count > id->curr_recv_count)
 		updatetx = 1;
 
+	hold_quirk = (id->quirks & CDNS_I2C_BROKEN_HOLD_BIT) && updatetx;
+
 	/* When receiving, handle data interrupt and completion interrupt */
 	if (id->p_recv_buf &&
 	    ((isr_status & CDNS_I2C_IXR_COMP) ||
@@ -229,8 +247,7 @@ static irqreturn_t cdns_i2c_isr(int irq, void *ptr)
 			id->recv_count--;
 			id->curr_recv_count--;
 
-			if (updatetx &&
-			    (id->curr_recv_count == CDNS_I2C_FIFO_DEPTH + 1))
+			if (cdns_is_holdquirk(id, hold_quirk))
 				break;
 		}
 
@@ -241,8 +258,7 @@ static irqreturn_t cdns_i2c_isr(int irq, void *ptr)
 		 * maintain transfer size non-zero while performing a large
 		 * receive operation.
 		 */
-		if (updatetx &&
-		    (id->curr_recv_count == CDNS_I2C_FIFO_DEPTH + 1)) {
+		if (cdns_is_holdquirk(id, hold_quirk)) {
 			/* wait while fifo is full */
 			while (cdns_i2c_readreg(CDNS_I2C_XFER_SIZE_OFFSET) !=
 			       (id->curr_recv_count - CDNS_I2C_FIFO_DEPTH))
@@ -264,6 +280,22 @@ static irqreturn_t cdns_i2c_isr(int irq, void *ptr)
 						  CDNS_I2C_XFER_SIZE_OFFSET);
 				id->curr_recv_count = id->recv_count;
 			}
+		} else if (id->recv_count && !hold_quirk &&
+						!id->curr_recv_count) {
+
+			/* Set the slave address in address register*/
+			cdns_i2c_writereg(id->p_msg->addr & CDNS_I2C_ADDR_MASK,
+						CDNS_I2C_ADDR_OFFSET);
+
+			if (id->recv_count > CDNS_I2C_TRANSFER_SIZE) {
+				cdns_i2c_writereg(CDNS_I2C_TRANSFER_SIZE,
+						CDNS_I2C_XFER_SIZE_OFFSET);
+				id->curr_recv_count = CDNS_I2C_TRANSFER_SIZE;
+			} else {
+				cdns_i2c_writereg(id->recv_count,
+						CDNS_I2C_XFER_SIZE_OFFSET);
+				id->curr_recv_count = id->recv_count;
+			}
 		}
 
 		/* Clear hold (if not repeated start) and signal completion */
@@ -535,11 +567,13 @@ static int cdns_i2c_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
 	int ret, count;
 	u32 reg;
 	struct cdns_i2c *id = adap->algo_data;
+	bool hold_quirk;
 
 	/* Check if the bus is free */
 	if (cdns_i2c_readreg(CDNS_I2C_SR_OFFSET) & CDNS_I2C_SR_BA)
 		return -EAGAIN;
 
+	hold_quirk = !!(id->quirks & CDNS_I2C_BROKEN_HOLD_BIT);
 	/*
 	 * Set the flag to one when multiple messages are to be
 	 * processed with a repeated start.
@@ -552,7 +586,7 @@ static int cdns_i2c_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
 		 * followed by any other message, an error is returned
 		 * indicating that this sequence is not supported.
 		 */
-		for (count = 0; count < num - 1; count++) {
+		for (count = 0; (count < num - 1 && hold_quirk); count++) {
 			if (msgs[count].flags & I2C_M_RD) {
 				dev_warn(adap->dev.parent,
 					 "Can't do repeated start after a receive message\n");
@@ -815,6 +849,17 @@ static int __maybe_unused cdns_i2c_resume(struct device *_dev)
 static SIMPLE_DEV_PM_OPS(cdns_i2c_dev_pm_ops, cdns_i2c_suspend,
 			 cdns_i2c_resume);
 
+static const struct cdns_platform_data r1p10_i2c_def = {
+	.quirks = CDNS_I2C_BROKEN_HOLD_BIT,
+};
+
+static const struct of_device_id cdns_i2c_of_match[] = {
+	{ .compatible = "cdns,i2c-r1p10", .data = &r1p10_i2c_def },
+	{ .compatible = "cdns,i2c-r1p14",},
+	{ /* end of table */ }
+};
+MODULE_DEVICE_TABLE(of, cdns_i2c_of_match);
+
 /**
  * cdns_i2c_probe - Platform registration call
  * @pdev:	Handle to the platform device structure
@@ -830,6 +875,7 @@ static int cdns_i2c_probe(struct platform_device *pdev)
 	struct resource *r_mem;
 	struct cdns_i2c *id;
 	int ret;
+	const struct of_device_id *match;
 
 	id = devm_kzalloc(&pdev->dev, sizeof(*id), GFP_KERNEL);
 	if (!id)
@@ -837,6 +883,12 @@ static int cdns_i2c_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, id);
 
+	match = of_match_node(cdns_i2c_of_match, pdev->dev.of_node);
+	if (match && match->data) {
+		const struct cdns_platform_data *data = match->data;
+		id->quirks = data->quirks;
+	}
+
 	r_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	id->membase = devm_ioremap_resource(&pdev->dev, r_mem);
 	if (IS_ERR(id->membase))
@@ -935,12 +987,6 @@ static int cdns_i2c_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static const struct of_device_id cdns_i2c_of_match[] = {
-	{ .compatible = "cdns,i2c-r1p10", },
-	{ /* end of table */ }
-};
-MODULE_DEVICE_TABLE(of, cdns_i2c_of_match);
-
 static struct platform_driver cdns_i2c_drv = {
 	.driver = {
 		.name  = DRIVER_NAME,

From 82cd5d041c8c2cdceaa6833d2f73905279d1c94f Mon Sep 17 00:00:00 2001
From: Ondrej Zary <linux@rainbow-software.org>
Date: Mon, 13 Jul 2015 19:31:12 +0200
Subject: [PATCH 157/734] i2c: parport: Add VCT-jig adapter

Add support for VCT-jig parallel port I2C adapter to i2c-parport.

The adapter schematic can be found here (in the RAR file):
http://remont-aud.net/shop/22/desc/vct-jig-komplekt-dlja-samostojatelnoj-sborki

Signed-off-by: Ondrej Zary <linux@rainbow-software.org>
Reviewed-by: Jean Delvare <jdelvare@suse.de>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 Documentation/i2c/busses/i2c-parport | 1 +
 drivers/i2c/busses/i2c-parport.h     | 8 ++++++++
 2 files changed, 9 insertions(+)

diff --git a/Documentation/i2c/busses/i2c-parport b/Documentation/i2c/busses/i2c-parport
index 0e2d17b460fddc..c3dbb3bfd8141f 100644
--- a/Documentation/i2c/busses/i2c-parport
+++ b/Documentation/i2c/busses/i2c-parport
@@ -20,6 +20,7 @@ It currently supports the following devices:
  * (type=5) Analog Devices evaluation boards: ADM1025, ADM1030, ADM1031
  * (type=6) Barco LPT->DVI (K5800236) adapter
  * (type=7) One For All JP1 parallel port adapter
+ * (type=8) VCT-jig
 
 These devices use different pinout configurations, so you have to tell
 the driver what you have, using the type module parameter. There is no
diff --git a/drivers/i2c/busses/i2c-parport.h b/drivers/i2c/busses/i2c-parport.h
index 4e129453680515..84a6616b072f94 100644
--- a/drivers/i2c/busses/i2c-parport.h
+++ b/drivers/i2c/busses/i2c-parport.h
@@ -89,6 +89,13 @@ static const struct adapter_parm adapter_parm[] = {
 		.getsda	= { 0x80, PORT_STAT, 1 },
 		.init	= { 0x04, PORT_DATA, 1 },
 	},
+	/* type 8: VCT-jig */
+	{
+		.setsda	= { 0x04, PORT_DATA, 1 },
+		.setscl	= { 0x01, PORT_DATA, 1 },
+		.getsda	= { 0x40, PORT_STAT, 0 },
+		.getscl	= { 0x80, PORT_STAT, 1 },
+	},
 };
 
 static int type = -1;
@@ -103,4 +110,5 @@ MODULE_PARM_DESC(type,
 	" 5 = ADM1025, ADM1030 and ADM1031 evaluation boards\n"
 	" 6 = Barco LPT->DVI (K5800236) adapter\n"
 	" 7 = One For All JP1 parallel port adapter\n"
+	" 8 = VCT-jig\n"
 );

From 8ab7f089ec003f817f74c45a2563ed40a50de208 Mon Sep 17 00:00:00 2001
From: Denis Carikli <denis@eukrea.com>
Date: Thu, 23 Jul 2015 12:03:47 +0200
Subject: [PATCH 158/734] DT: i2c: Add ADS7828 and ADS7830 to list of trivial
 devices

This adds devicetree documentation for the bindings of the
ads7828 driver.

Signed-off-by: Denis Carikli <denis@eukrea.com>
Acked-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 Documentation/devicetree/bindings/i2c/trivial-devices.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/i2c/trivial-devices.txt b/Documentation/devicetree/bindings/i2c/trivial-devices.txt
index 00f8652e193a94..d77d412cbc685f 100644
--- a/Documentation/devicetree/bindings/i2c/trivial-devices.txt
+++ b/Documentation/devicetree/bindings/i2c/trivial-devices.txt
@@ -95,6 +95,8 @@ stm,m41t00		Serial Access TIMEKEEPER
 stm,m41t62		Serial real-time clock (RTC) with alarm
 stm,m41t80		M41T80 - SERIAL ACCESS RTC WITH ALARMS
 taos,tsl2550		Ambient Light Sensor with SMBUS/Two Wire Serial Interface
+ti,ads7828		8-Channels, 12-bit ADC
+ti,ads7830		8-Channels, 8-bit ADC
 ti,tsc2003		I2C Touch-Screen Controller
 ti,tmp102		Low Power Digital Temperature Sensor with SMBUS/Two Wire Serial Interface
 ti,tmp103		Low Power Digital Temperature Sensor with SMBUS/Two Wire Serial Interface

From b0898fdaffb2932aba0108986b90d9d69888e189 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 8 Jul 2015 13:15:36 +0300
Subject: [PATCH 159/734] i2c: designware-pci: use IRQF_COND_SUSPEND flag

The mentioned flag fixes a warning on Intel Edison board since one of the I2C
controller shares IRQ line with watchdog timer.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-designware-pcidrv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/i2c/busses/i2c-designware-pcidrv.c b/drivers/i2c/busses/i2c-designware-pcidrv.c
index 6643d2dc0b250d..df23e8c30e6f8a 100644
--- a/drivers/i2c/busses/i2c-designware-pcidrv.c
+++ b/drivers/i2c/busses/i2c-designware-pcidrv.c
@@ -260,8 +260,8 @@ static int i2c_dw_pci_probe(struct pci_dev *pdev,
 
 	snprintf(adap->name, sizeof(adap->name), "i2c-designware-pci");
 
-	r = devm_request_irq(&pdev->dev, pdev->irq, i2c_dw_isr, IRQF_SHARED,
-			adap->name, dev);
+	r = devm_request_irq(&pdev->dev, pdev->irq, i2c_dw_isr,
+			IRQF_SHARED | IRQF_COND_SUSPEND, adap->name, dev);
 	if (r) {
 		dev_err(&pdev->dev, "failure requesting irq %i\n", dev->irq);
 		return r;

From 069d5b745b2a98ebc8c239fe0b7bdb6c5ff0f475 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Fri, 10 Jul 2015 14:52:31 +0900
Subject: [PATCH 160/734] i2c: Drop owner assignment from i2c_driver

i2c_driver does not need to set an owner because i2c_register_driver()
will set it.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-slave-eeprom.c      | 1 -
 drivers/i2c/muxes/i2c-mux-pca9541.c | 1 -
 drivers/i2c/muxes/i2c-mux-pca954x.c | 1 -
 3 files changed, 3 deletions(-)

diff --git a/drivers/i2c/i2c-slave-eeprom.c b/drivers/i2c/i2c-slave-eeprom.c
index 1da44961477953..b2039f94c9d89e 100644
--- a/drivers/i2c/i2c-slave-eeprom.c
+++ b/drivers/i2c/i2c-slave-eeprom.c
@@ -157,7 +157,6 @@ MODULE_DEVICE_TABLE(i2c, i2c_slave_eeprom_id);
 static struct i2c_driver i2c_slave_eeprom_driver = {
 	.driver = {
 		.name = "i2c-slave-eeprom",
-		.owner = THIS_MODULE,
 	},
 	.probe = i2c_slave_eeprom_probe,
 	.remove = i2c_slave_eeprom_remove,
diff --git a/drivers/i2c/muxes/i2c-mux-pca9541.c b/drivers/i2c/muxes/i2c-mux-pca9541.c
index 0c8d4d2cbdaf42..d0ba424adebc80 100644
--- a/drivers/i2c/muxes/i2c-mux-pca9541.c
+++ b/drivers/i2c/muxes/i2c-mux-pca9541.c
@@ -386,7 +386,6 @@ static int pca9541_remove(struct i2c_client *client)
 static struct i2c_driver pca9541_driver = {
 	.driver = {
 		   .name = "pca9541",
-		   .owner = THIS_MODULE,
 		   },
 	.probe = pca9541_probe,
 	.remove = pca9541_remove,
diff --git a/drivers/i2c/muxes/i2c-mux-pca954x.c b/drivers/i2c/muxes/i2c-mux-pca954x.c
index ea4aa9dfcea967..acfcef3d406854 100644
--- a/drivers/i2c/muxes/i2c-mux-pca954x.c
+++ b/drivers/i2c/muxes/i2c-mux-pca954x.c
@@ -300,7 +300,6 @@ static struct i2c_driver pca954x_driver = {
 	.driver		= {
 		.name	= "pca954x",
 		.pm	= &pca954x_pm,
-		.owner	= THIS_MODULE,
 	},
 	.probe		= pca954x_probe,
 	.remove		= pca954x_remove,

From 141124e6e0db645396321aabed01c5e0b12cccc1 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Fri, 10 Jul 2015 15:23:17 +0900
Subject: [PATCH 161/734] misc: Drop owner assignment from i2c_driver

i2c_driver does not need to set an owner because i2c_register_driver()
will set it.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/misc/ad525x_dpot-i2c.c         | 1 -
 drivers/misc/apds990x.c                | 1 -
 drivers/misc/bh1770glc.c               | 1 -
 drivers/misc/bmp085-i2c.c              | 1 -
 drivers/misc/eeprom/at24.c             | 1 -
 drivers/misc/isl29003.c                | 1 -
 drivers/misc/lis3lv02d/lis3lv02d_i2c.c | 1 -
 drivers/misc/ti-st/st_kim.c            | 1 -
 drivers/misc/tsl2550.c                 | 1 -
 9 files changed, 9 deletions(-)

diff --git a/drivers/misc/ad525x_dpot-i2c.c b/drivers/misc/ad525x_dpot-i2c.c
index 705b881e186d53..d11187d36ddd59 100644
--- a/drivers/misc/ad525x_dpot-i2c.c
+++ b/drivers/misc/ad525x_dpot-i2c.c
@@ -106,7 +106,6 @@ MODULE_DEVICE_TABLE(i2c, ad_dpot_id);
 static struct i2c_driver ad_dpot_i2c_driver = {
 	.driver = {
 		.name	= "ad_dpot",
-		.owner	= THIS_MODULE,
 	},
 	.probe		= ad_dpot_i2c_probe,
 	.remove		= ad_dpot_i2c_remove,
diff --git a/drivers/misc/apds990x.c b/drivers/misc/apds990x.c
index 3739ffa9cdf132..a3e789b85cc82e 100644
--- a/drivers/misc/apds990x.c
+++ b/drivers/misc/apds990x.c
@@ -1275,7 +1275,6 @@ static const struct dev_pm_ops apds990x_pm_ops = {
 static struct i2c_driver apds990x_driver = {
 	.driver	 = {
 		.name	= "apds990x",
-		.owner	= THIS_MODULE,
 		.pm	= &apds990x_pm_ops,
 	},
 	.probe	  = apds990x_probe,
diff --git a/drivers/misc/bh1770glc.c b/drivers/misc/bh1770glc.c
index b756381b825071..753d7ecdadaa78 100644
--- a/drivers/misc/bh1770glc.c
+++ b/drivers/misc/bh1770glc.c
@@ -1396,7 +1396,6 @@ static const struct dev_pm_ops bh1770_pm_ops = {
 static struct i2c_driver bh1770_driver = {
 	.driver	 = {
 		.name	= "bh1770glc",
-		.owner	= THIS_MODULE,
 		.pm	= &bh1770_pm_ops,
 	},
 	.probe	  = bh1770_probe,
diff --git a/drivers/misc/bmp085-i2c.c b/drivers/misc/bmp085-i2c.c
index a7c16295b8161f..f35c218aaa1a80 100644
--- a/drivers/misc/bmp085-i2c.c
+++ b/drivers/misc/bmp085-i2c.c
@@ -66,7 +66,6 @@ MODULE_DEVICE_TABLE(i2c, bmp085_id);
 
 static struct i2c_driver bmp085_i2c_driver = {
 	.driver = {
-		.owner	= THIS_MODULE,
 		.name	= BMP085_NAME,
 	},
 	.id_table	= bmp085_id,
diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
index 6ded3dc36644a3..2b254f3a1154e6 100644
--- a/drivers/misc/eeprom/at24.c
+++ b/drivers/misc/eeprom/at24.c
@@ -686,7 +686,6 @@ static int at24_remove(struct i2c_client *client)
 static struct i2c_driver at24_driver = {
 	.driver = {
 		.name = "at24",
-		.owner = THIS_MODULE,
 	},
 	.probe = at24_probe,
 	.remove = at24_remove,
diff --git a/drivers/misc/isl29003.c b/drivers/misc/isl29003.c
index 12c30b486b27c0..976df001363355 100644
--- a/drivers/misc/isl29003.c
+++ b/drivers/misc/isl29003.c
@@ -465,7 +465,6 @@ MODULE_DEVICE_TABLE(i2c, isl29003_id);
 static struct i2c_driver isl29003_driver = {
 	.driver = {
 		.name	= ISL29003_DRV_NAME,
-		.owner	= THIS_MODULE,
 		.pm	= ISL29003_PM_OPS,
 	},
 	.probe	= isl29003_probe,
diff --git a/drivers/misc/lis3lv02d/lis3lv02d_i2c.c b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c
index e3e7f1dc27ba5d..0c3bb7e3ee80d4 100644
--- a/drivers/misc/lis3lv02d/lis3lv02d_i2c.c
+++ b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c
@@ -274,7 +274,6 @@ static const struct dev_pm_ops lis3_pm_ops = {
 static struct i2c_driver lis3lv02d_i2c_driver = {
 	.driver	 = {
 		.name   = DRV_NAME,
-		.owner  = THIS_MODULE,
 		.pm     = &lis3_pm_ops,
 		.of_match_table = of_match_ptr(lis3lv02d_i2c_dt_ids),
 	},
diff --git a/drivers/misc/ti-st/st_kim.c b/drivers/misc/ti-st/st_kim.c
index 5027b8ffae4378..c84093e639e0ad 100644
--- a/drivers/misc/ti-st/st_kim.c
+++ b/drivers/misc/ti-st/st_kim.c
@@ -939,7 +939,6 @@ static struct platform_driver kim_platform_driver = {
 	.resume = kim_resume,
 	.driver = {
 		.name = "kim",
-		.owner = THIS_MODULE,
 		.of_match_table = of_match_ptr(kim_of_match),
 	},
 };
diff --git a/drivers/misc/tsl2550.c b/drivers/misc/tsl2550.c
index b00335652e52ac..87a13374fdc0da 100644
--- a/drivers/misc/tsl2550.c
+++ b/drivers/misc/tsl2550.c
@@ -446,7 +446,6 @@ MODULE_DEVICE_TABLE(i2c, tsl2550_id);
 static struct i2c_driver tsl2550_driver = {
 	.driver = {
 		.name	= TSL2550_DRV_NAME,
-		.owner	= THIS_MODULE,
 		.pm	= TSL2550_PM_OPS,
 	},
 	.probe	= tsl2550_probe,

From 611e12ea0f121a31d9e9c4ce2a18a77abc2f28d6 Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vladimir_zapolskiy@mentor.com>
Date: Mon, 27 Jul 2015 17:30:49 +0300
Subject: [PATCH 162/734] i2c: core: manage i2c bus device refcount in
 i2c_[get|put]_adapter

In addition to module_get()/module_put() add get_device()/put_device()
calls into i2c_get_adapter()/i2c_put_adapter() exported
interfaces. This is done to lock I2C bus device, if it is in use by a
client.

Signed-off-by: Vladimir Zapolskiy <vladimir_zapolskiy@mentor.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index c83e4d13cfc5c4..f80992d0a60808 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -2413,9 +2413,15 @@ struct i2c_adapter *i2c_get_adapter(int nr)
 
 	mutex_lock(&core_lock);
 	adapter = idr_find(&i2c_adapter_idr, nr);
-	if (adapter && !try_module_get(adapter->owner))
+	if (!adapter)
+		goto exit;
+
+	if (try_module_get(adapter->owner))
+		get_device(&adapter->dev);
+	else
 		adapter = NULL;
 
+ exit:
 	mutex_unlock(&core_lock);
 	return adapter;
 }
@@ -2423,8 +2429,11 @@ EXPORT_SYMBOL(i2c_get_adapter);
 
 void i2c_put_adapter(struct i2c_adapter *adap)
 {
-	if (adap)
-		module_put(adap->owner);
+	if (!adap)
+		return;
+
+	put_device(&adap->dev);
+	module_put(adap->owner);
 }
 EXPORT_SYMBOL(i2c_put_adapter);
 

From 48e9743dd6483c5fd3f10c8e42c60d52d64b0e27 Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vladimir_zapolskiy@mentor.com>
Date: Mon, 27 Jul 2015 17:30:50 +0300
Subject: [PATCH 163/734] i2c: core: add and export
 of_get_i2c_adapter_by_node() interface

of_find_i2c_adapter_by_node() call requires quite often missing
put_device(), and i2c_put_adapter() releases a device locked by
i2c_get_adapter() only. In general module_put(adapter->owner) and
put_device(dev) are not interchangeable.

This is a common error reproduction scenario as a result of the
misusage described above (for clearness this is run on iMX6 platform
with HDMI and I2C bus drivers compiled as kernel modules):

    root@mx6q:~# lsmod | grep i2c
    i2c_imx                10213  0
    root@mx6q:~# lsmod | grep dw_hdmi_imx
    dw_hdmi_imx             3631  0
    dw_hdmi                11846  1 dw_hdmi_imx
    imxdrm                  8674  3 dw_hdmi_imx,imx_ipuv3_crtc,imx_ldb
    drm_kms_helper        113765  5 dw_hdmi,imxdrm,imx_ipuv3_crtc,imx_ldb
    root@mx6q:~# rmmod dw_hdmi_imx
    root@mx6q:~# lsmod | grep i2c
    i2c_imx                10213  -1

                                 ^^^^^

    root@mx6q:~# rmmod i2c_imx
    rmmod: ERROR: Module i2c_imx is in use

To fix existing users of these interfaces and to avoid any further
confusion and misusage in future, add one more interface
of_get_i2c_adapter_by_node(), it is similar to i2c_get_adapter() in
sense that an I2C bus device driver found and locked by user can be
correctly unlocked by i2c_put_adapter().

Signed-off-by: Vladimir Zapolskiy <vladimir_zapolskiy@mentor.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core.c | 18 ++++++++++++++++++
 include/linux/i2c.h    |  7 +++++++
 2 files changed, 25 insertions(+)

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index f80992d0a60808..07a83f34ed58e7 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -1371,6 +1371,24 @@ struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node *node)
 	return adapter;
 }
 EXPORT_SYMBOL(of_find_i2c_adapter_by_node);
+
+/* must call i2c_put_adapter() when done with returned i2c_adapter device */
+struct i2c_adapter *of_get_i2c_adapter_by_node(struct device_node *node)
+{
+	struct i2c_adapter *adapter;
+
+	adapter = of_find_i2c_adapter_by_node(node);
+	if (!adapter)
+		return NULL;
+
+	if (!try_module_get(adapter->owner)) {
+		put_device(&adapter->dev);
+		adapter = NULL;
+	}
+
+	return adapter;
+}
+EXPORT_SYMBOL(of_get_i2c_adapter_by_node);
 #else
 static void of_i2c_register_devices(struct i2c_adapter *adap) { }
 #endif /* CONFIG_OF */
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index e83a738a3b8741..e2c859b74f8b71 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -638,6 +638,8 @@ extern struct i2c_client *of_find_i2c_device_by_node(struct device_node *node);
 /* must call put_device() when done with returned i2c_adapter device */
 extern struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node *node);
 
+/* must call i2c_put_adapter() when done with returned i2c_adapter device */
+struct i2c_adapter *of_get_i2c_adapter_by_node(struct device_node *node);
 #else
 
 static inline struct i2c_client *of_find_i2c_device_by_node(struct device_node *node)
@@ -649,6 +651,11 @@ static inline struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node
 {
 	return NULL;
 }
+
+static inline struct i2c_adapter *of_get_i2c_adapter_by_node(struct device_node *node)
+{
+	return NULL;
+}
 #endif /* CONFIG_OF */
 
 #endif /* _LINUX_I2C_H */

From 0d1ad98dd837c069572d16faf77fa7f50748c7d5 Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vladimir_zapolskiy@mentor.com>
Date: Mon, 27 Jul 2015 17:30:51 +0300
Subject: [PATCH 164/734] i2c: arb-gpio-challenge: use
 of_get_i2c_adapter_by_node interface

This change is needed to properly lock I2C parent bus driver.

Prior to this change i2c_put_adapter() is misused, which may lead
to an overflow over zero of I2C bus driver user counter.

Signed-off-by: Vladimir Zapolskiy <vladimir_zapolskiy@mentor.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/muxes/i2c-arb-gpio-challenge.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/muxes/i2c-arb-gpio-challenge.c b/drivers/i2c/muxes/i2c-arb-gpio-challenge.c
index 5cf1b60b69e214..71aac0911bf75e 100644
--- a/drivers/i2c/muxes/i2c-arb-gpio-challenge.c
+++ b/drivers/i2c/muxes/i2c-arb-gpio-challenge.c
@@ -196,7 +196,7 @@ static int i2c_arbitrator_probe(struct platform_device *pdev)
 		dev_err(dev, "Cannot parse i2c-parent\n");
 		return -EINVAL;
 	}
-	arb->parent = of_find_i2c_adapter_by_node(parent_np);
+	arb->parent = of_get_i2c_adapter_by_node(parent_np);
 	if (!arb->parent) {
 		dev_err(dev, "Cannot find parent bus\n");
 		return -EPROBE_DEFER;

From fb427466dcde21fc4aab27153735bdd5c62b422d Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Fri, 7 Aug 2015 14:53:03 +0300
Subject: [PATCH 165/734] i2c: designware: Make debug print in i2c_dw_isr()
 shorter

Printing adapter name is irrelevant from this debug print and makes output
needlessly long. Having already device and functions names printed here is
enough for debugging.

While at it remove extra space from "enabled= 0x" and use "%#x" for
printing "0x" prefixed hexadecimal values.

Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
[wsa: made it a oneliner]
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-designware-core.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/i2c/busses/i2c-designware-core.c b/drivers/i2c/busses/i2c-designware-core.c
index 6f19a33773fe79..7441cdc1b34a6a 100644
--- a/drivers/i2c/busses/i2c-designware-core.c
+++ b/drivers/i2c/busses/i2c-designware-core.c
@@ -777,8 +777,7 @@ irqreturn_t i2c_dw_isr(int this_irq, void *dev_id)
 
 	enabled = dw_readl(dev, DW_IC_ENABLE);
 	stat = dw_readl(dev, DW_IC_RAW_INTR_STAT);
-	dev_dbg(dev->dev, "%s:  %s enabled= 0x%x stat=0x%x\n", __func__,
-		dev->adapter.name, enabled, stat);
+	dev_dbg(dev->dev, "%s: enabled=%#x stat=%#x\n", __func__, enabled, stat);
 	if (!enabled || !(stat & ~DW_IC_INTR_ACTIVITY))
 		return IRQ_NONE;
 

From c49c1f0873faddf78c3545a8e25dfe23e8d15c2f Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@osg.samsung.com>
Date: Fri, 7 Aug 2015 14:04:19 +0200
Subject: [PATCH 166/734] misc: eeprom: Export I2C module alias information in
 missing drivers

The I2C core always reports the MODALIAS uevent as "i2c:<client name>"
regardless if the driver later is match using the I2C id_table or the
of_match_table. So the driver needs to export the I2C table and this
be built into the module or udev won't have the necessary information
to auto load the correct module when the device is added.

Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
Reviewed-by: Jean Delvare <jdelvare@suse.de>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/misc/eeprom/max6875.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/misc/eeprom/max6875.c b/drivers/misc/eeprom/max6875.c
index 580ff9df55296c..c74920cc3d18af 100644
--- a/drivers/misc/eeprom/max6875.c
+++ b/drivers/misc/eeprom/max6875.c
@@ -197,6 +197,7 @@ static const struct i2c_device_id max6875_id[] = {
 	{ "max6875", 0 },
 	{ }
 };
+MODULE_DEVICE_TABLE(i2c, max6875_id);
 
 static struct i2c_driver max6875_driver = {
 	.driver = {

From 301b06f80fb120b36ad6981038ffe8a0b5039d44 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Sat, 8 Aug 2015 20:30:33 +0200
Subject: [PATCH 167/734] rtc: bq32k: move binding docs to proper place

The I2C dir is not for I2C client devices! Move it to the proper folder.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Acked-by: Pavel Machek <pavel@denx.de>
---
 Documentation/devicetree/bindings/{i2c => rtc}/ti,bq32k.txt | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename Documentation/devicetree/bindings/{i2c => rtc}/ti,bq32k.txt (100%)

diff --git a/Documentation/devicetree/bindings/i2c/ti,bq32k.txt b/Documentation/devicetree/bindings/rtc/ti,bq32k.txt
similarity index 100%
rename from Documentation/devicetree/bindings/i2c/ti,bq32k.txt
rename to Documentation/devicetree/bindings/rtc/ti,bq32k.txt

From 220c04f834f7bd76e1a33711add61735796dc7f2 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Sat, 8 Aug 2015 20:30:34 +0200
Subject: [PATCH 168/734] hwmon: max6697: move binding docs to proper place

The I2C dir is not for I2C client devices! Move it to the proper folder.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Acked-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/devicetree/bindings/{i2c => hwmon}/max6697.txt | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename Documentation/devicetree/bindings/{i2c => hwmon}/max6697.txt (100%)

diff --git a/Documentation/devicetree/bindings/i2c/max6697.txt b/Documentation/devicetree/bindings/hwmon/max6697.txt
similarity index 100%
rename from Documentation/devicetree/bindings/i2c/max6697.txt
rename to Documentation/devicetree/bindings/hwmon/max6697.txt

From 8113627c3f777473262192dfb2c693f0e1f78ef5 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Sat, 8 Aug 2015 20:30:35 +0200
Subject: [PATCH 169/734] hwmon: ina2xx: move binding docs to proper place

The I2C dir is not for I2C client devices! Move it to the proper folder.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Acked-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/devicetree/bindings/{i2c => hwmon}/ina2xx.txt | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename Documentation/devicetree/bindings/{i2c => hwmon}/ina2xx.txt (100%)

diff --git a/Documentation/devicetree/bindings/i2c/ina2xx.txt b/Documentation/devicetree/bindings/hwmon/ina2xx.txt
similarity index 100%
rename from Documentation/devicetree/bindings/i2c/ina2xx.txt
rename to Documentation/devicetree/bindings/hwmon/ina2xx.txt

From 6e24d205a8aa78227c6f2573ed725b4517b5b1b3 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Sat, 8 Aug 2015 20:30:36 +0200
Subject: [PATCH 170/734] hwmon: ina209: move binding docs to proper place

The I2C dir is not for I2C client devices! Move it to the proper folder.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Acked-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/devicetree/bindings/{i2c => hwmon}/ina209.txt | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename Documentation/devicetree/bindings/{i2c => hwmon}/ina209.txt (100%)

diff --git a/Documentation/devicetree/bindings/i2c/ina209.txt b/Documentation/devicetree/bindings/hwmon/ina209.txt
similarity index 100%
rename from Documentation/devicetree/bindings/i2c/ina209.txt
rename to Documentation/devicetree/bindings/hwmon/ina209.txt

From a1f64317bbf5febe4c763cd6e022d533a802b847 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 21 Jul 2015 16:14:36 +0900
Subject: [PATCH 171/734] i2c: cadence: set THIS_MODULE to the owner of the
 adapter

The owner of the adapter is missing, while this driver is tristate.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-cadence.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c
index 18e48ae4a60da6..84deed6571bdf4 100644
--- a/drivers/i2c/busses/i2c-cadence.c
+++ b/drivers/i2c/busses/i2c-cadence.c
@@ -896,6 +896,7 @@ static int cdns_i2c_probe(struct platform_device *pdev)
 
 	id->irq = platform_get_irq(pdev, 0);
 
+	id->adap.owner = THIS_MODULE;
 	id->adap.dev.of_node = pdev->dev.of_node;
 	id->adap.algo = &cdns_i2c_algo;
 	id->adap.timeout = CDNS_I2C_TIMEOUT;

From 66621f96c95175cc64d91dd213781bf65bbf8ec4 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 17 Jul 2015 13:01:23 +0100
Subject: [PATCH 172/734] i2c: viperboard: clean up inconsistent indenting

Minor clean up of indenting, no functional change

Signed-off-by: Colin Ian King <colin.king@canonical.com>
[wsa: squashed two lines into one]
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-viperboard.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/i2c/busses/i2c-viperboard.c b/drivers/i2c/busses/i2c-viperboard.c
index 47e88adf2011e1..543456a0a3382d 100644
--- a/drivers/i2c/busses/i2c-viperboard.c
+++ b/drivers/i2c/busses/i2c-viperboard.c
@@ -391,11 +391,11 @@ static int vprbrd_i2c_probe(struct platform_device *pdev)
 			VPRBRD_USB_REQUEST_I2C_FREQ, VPRBRD_USB_TYPE_OUT,
 			0x0000, 0x0000, &vb_i2c->bus_freq_param, 1,
 			VPRBRD_USB_TIMEOUT_MS);
-	    if (ret != 1) {
-		dev_err(&pdev->dev,
-			"failure setting i2c_bus_freq to %d\n", i2c_bus_freq);
-		return -EIO;
-	    }
+		if (ret != 1) {
+			dev_err(&pdev->dev, "failure setting i2c_bus_freq to %d\n",
+				i2c_bus_freq);
+			return -EIO;
+		}
 	} else {
 		dev_err(&pdev->dev,
 			"invalid i2c_bus_freq setting:%d\n", i2c_bus_freq);

From 41a2d5751616e38d1e293e3cb35a6e2bc7a03473 Mon Sep 17 00:00:00 2001
From: Roman Kubiak <r.kubiak@samsung.com>
Date: Mon, 10 Aug 2015 16:54:25 +0200
Subject: [PATCH 173/734] Kernel threads excluded from smack checks

Adds an ignore case for kernel tasks,
so that they can access all resources.

Since kernel worker threads are spawned with
floor label, they are severely restricted by
Smack policy. It is not an issue without onlycap,
as these processes also run with root,
so CAP_MAC_OVERRIDE kicks in. But with onlycap
turned on, there is no way to change the label
for these processes.

Signed-off-by: Roman Kubiak <r.kubiak@samsung.com>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
---
 security/smack/smack_access.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c
index 00f6b38bffbde4..bc1053fb5d1d06 100644
--- a/security/smack/smack_access.c
+++ b/security/smack/smack_access.c
@@ -639,6 +639,12 @@ int smack_privileged(int cap)
 	struct smack_known *skp = smk_of_current();
 	struct smack_onlycap *sop;
 
+	/*
+	 * All kernel tasks are privileged
+	 */
+	if (unlikely(current->flags & PF_KTHREAD))
+		return 1;
+
 	if (!capable(cap))
 		return 0;
 

From 124fe20d94630b6f173dae5eb815e6e6e350c72d Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 10 Aug 2015 23:07:05 -0400
Subject: [PATCH 174/734] mm: enhance region_is_ram() to region_intersects()

region_is_ram() is used to prevent the establishment of aliased mappings
to physical "System RAM" with incompatible cache settings.  However, it
uses "-1" to indicate both "unknown" memory ranges (ranges not described
by platform firmware) and "mixed" ranges (where the parameters describe
a range that partially overlaps "System RAM").

Fix this up by explicitly tracking the "unknown" vs "mixed" resource
cases and returning REGION_INTERSECTS, REGION_MIXED, or REGION_DISJOINT.
This re-write also adds support for detecting when the requested region
completely eclipses all of a resource.  Note, the implementation treats
overlaps between "unknown" and the requested memory type as
REGION_INTERSECTS.

Finally, other memory types can be passed in by name, for now the only
usage "System RAM".

Suggested-by: Luis R. Rodriguez <mcgrof@suse.com>
Reviewed-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/mm.h |  9 ++++++-
 kernel/resource.c  | 61 +++++++++++++++++++++++++++-------------------
 2 files changed, 44 insertions(+), 26 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2e872f92dbac0c..84b05ebedb2da6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -369,7 +369,14 @@ static inline int put_page_unless_one(struct page *page)
 }
 
 extern int page_is_ram(unsigned long pfn);
-extern int region_is_ram(resource_size_t phys_addr, unsigned long size);
+
+enum {
+	REGION_INTERSECTS,
+	REGION_DISJOINT,
+	REGION_MIXED,
+};
+
+int region_intersects(resource_size_t offset, size_t size, const char *type);
 
 /* Support for virtually mapped pages */
 struct page *vmalloc_to_page(const void *addr);
diff --git a/kernel/resource.c b/kernel/resource.c
index fed052a1bc9f57..f150dbbe6f62d3 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -492,40 +492,51 @@ int __weak page_is_ram(unsigned long pfn)
 }
 EXPORT_SYMBOL_GPL(page_is_ram);
 
-/*
- * Search for a resouce entry that fully contains the specified region.
- * If found, return 1 if it is RAM, 0 if not.
- * If not found, or region is not fully contained, return -1
+/**
+ * region_intersects() - determine intersection of region with known resources
+ * @start: region start address
+ * @size: size of region
+ * @name: name of resource (in iomem_resource)
  *
- * Used by the ioremap functions to ensure the user is not remapping RAM and is
- * a vast speed up over walking through the resource table page by page.
+ * Check if the specified region partially overlaps or fully eclipses a
+ * resource identified by @name.  Return REGION_DISJOINT if the region
+ * does not overlap @name, return REGION_MIXED if the region overlaps
+ * @type and another resource, and return REGION_INTERSECTS if the
+ * region overlaps @type and no other defined resource. Note, that
+ * REGION_INTERSECTS is also returned in the case when the specified
+ * region overlaps RAM and undefined memory holes.
+ *
+ * region_intersect() is used by memory remapping functions to ensure
+ * the user is not remapping RAM and is a vast speed up over walking
+ * through the resource table page by page.
  */
-int region_is_ram(resource_size_t start, unsigned long size)
+int region_intersects(resource_size_t start, size_t size, const char *name)
 {
-	struct resource *p;
-	resource_size_t end = start + size - 1;
 	unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-	const char *name = "System RAM";
-	int ret = -1;
+	resource_size_t end = start + size - 1;
+	int type = 0; int other = 0;
+	struct resource *p;
 
 	read_lock(&resource_lock);
 	for (p = iomem_resource.child; p ; p = p->sibling) {
-		if (p->end < start)
-			continue;
-
-		if (p->start <= start && end <= p->end) {
-			/* resource fully contains region */
-			if ((p->flags != flags) || strcmp(p->name, name))
-				ret = 0;
-			else
-				ret = 1;
-			break;
-		}
-		if (end < p->start)
-			break;	/* not found */
+		bool is_type = strcmp(p->name, name) == 0 && p->flags == flags;
+
+		if (start >= p->start && start <= p->end)
+			is_type ? type++ : other++;
+		if (end >= p->start && end <= p->end)
+			is_type ? type++ : other++;
+		if (p->start >= start && p->end <= end)
+			is_type ? type++ : other++;
 	}
 	read_unlock(&resource_lock);
-	return ret;
+
+	if (other == 0)
+		return type ? REGION_INTERSECTS : REGION_DISJOINT;
+
+	if (type)
+		return REGION_MIXED;
+
+	return REGION_DISJOINT;
 }
 
 void __weak arch_remove_reservations(struct resource *avail)

From 2584cf83578c26db144730ef498f4070f82ee3ea Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 10 Aug 2015 23:07:05 -0400
Subject: [PATCH 175/734] arch, drivers: don't include <asm/io.h> directly, use
 <linux/io.h> instead

Preparation for uniform definition of ioremap, ioremap_wc, ioremap_wt,
and ioremap_cache, tree-wide.

Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/arm/mach-shmobile/pm-rcar.c            | 2 +-
 arch/ia64/kernel/cyclone.c                  | 2 +-
 drivers/isdn/icn/icn.h                      | 2 +-
 drivers/mtd/devices/slram.c                 | 2 +-
 drivers/mtd/nand/diskonchip.c               | 2 +-
 drivers/mtd/onenand/generic.c               | 2 +-
 drivers/scsi/sun3x_esp.c                    | 2 +-
 drivers/staging/comedi/drivers/ii_pci20kc.c | 1 +
 drivers/tty/serial/8250/8250_core.c         | 2 +-
 drivers/video/fbdev/s1d13xxxfb.c            | 3 +--
 drivers/video/fbdev/stifb.c                 | 1 +
 include/linux/io-mapping.h                  | 2 +-
 include/linux/mtd/map.h                     | 2 +-
 include/video/vga.h                         | 2 +-
 14 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/arch/arm/mach-shmobile/pm-rcar.c b/arch/arm/mach-shmobile/pm-rcar.c
index 00022ee56f80dc..9d3dde00c2fe37 100644
--- a/arch/arm/mach-shmobile/pm-rcar.c
+++ b/arch/arm/mach-shmobile/pm-rcar.c
@@ -12,7 +12,7 @@
 #include <linux/err.h>
 #include <linux/mm.h>
 #include <linux/spinlock.h>
-#include <asm/io.h>
+#include <linux/io.h>
 #include "pm-rcar.h"
 
 /* SYSC */
diff --git a/arch/ia64/kernel/cyclone.c b/arch/ia64/kernel/cyclone.c
index 4826ff957a3d18..5fa3848ba22497 100644
--- a/arch/ia64/kernel/cyclone.c
+++ b/arch/ia64/kernel/cyclone.c
@@ -4,7 +4,7 @@
 #include <linux/errno.h>
 #include <linux/timex.h>
 #include <linux/clocksource.h>
-#include <asm/io.h>
+#include <linux/io.h>
 
 /* IBM Summit (EXA) Cyclone counter code*/
 #define CYCLONE_CBAR_ADDR 0xFEB00CD0
diff --git a/drivers/isdn/icn/icn.h b/drivers/isdn/icn/icn.h
index b713466997a0d2..f8f2e76d34bf30 100644
--- a/drivers/isdn/icn/icn.h
+++ b/drivers/isdn/icn/icn.h
@@ -38,7 +38,7 @@ typedef struct icn_cdef {
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/major.h>
-#include <asm/io.h>
+#include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
 #include <linux/slab.h>
diff --git a/drivers/mtd/devices/slram.c b/drivers/mtd/devices/slram.c
index 2fc4957cbe7fec..a70eb83e68f12c 100644
--- a/drivers/mtd/devices/slram.c
+++ b/drivers/mtd/devices/slram.c
@@ -41,7 +41,7 @@
 #include <linux/fs.h>
 #include <linux/ioctl.h>
 #include <linux/init.h>
-#include <asm/io.h>
+#include <linux/io.h>
 
 #include <linux/mtd/mtd.h>
 
diff --git a/drivers/mtd/nand/diskonchip.c b/drivers/mtd/nand/diskonchip.c
index 7da266a5397990..0802158a3f757b 100644
--- a/drivers/mtd/nand/diskonchip.c
+++ b/drivers/mtd/nand/diskonchip.c
@@ -24,7 +24,7 @@
 #include <linux/rslib.h>
 #include <linux/moduleparam.h>
 #include <linux/slab.h>
-#include <asm/io.h>
+#include <linux/io.h>
 
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/nand.h>
diff --git a/drivers/mtd/onenand/generic.c b/drivers/mtd/onenand/generic.c
index 32a216d31141ba..ab7bda0bb245ce 100644
--- a/drivers/mtd/onenand/generic.c
+++ b/drivers/mtd/onenand/generic.c
@@ -18,7 +18,7 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/onenand.h>
 #include <linux/mtd/partitions.h>
-#include <asm/io.h>
+#include <linux/io.h>
 
 /*
  * Note: Driver name and platform data format have been updated!
diff --git a/drivers/scsi/sun3x_esp.c b/drivers/scsi/sun3x_esp.c
index e26e81de7c45ab..d50c5ed8f428c6 100644
--- a/drivers/scsi/sun3x_esp.c
+++ b/drivers/scsi/sun3x_esp.c
@@ -12,9 +12,9 @@
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 #include <linux/interrupt.h>
+#include <linux/io.h>
 
 #include <asm/sun3x.h>
-#include <asm/io.h>
 #include <asm/dma.h>
 #include <asm/dvma.h>
 
diff --git a/drivers/staging/comedi/drivers/ii_pci20kc.c b/drivers/staging/comedi/drivers/ii_pci20kc.c
index 0768bc42a5db5a..14ef1f67dd420b 100644
--- a/drivers/staging/comedi/drivers/ii_pci20kc.c
+++ b/drivers/staging/comedi/drivers/ii_pci20kc.c
@@ -28,6 +28,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/io.h>
 #include "../comedidev.h"
 
 /*
diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 37fff12dd4d06b..fe902ff52e584e 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -38,11 +38,11 @@
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 #include <linux/pm_runtime.h>
+#include <linux/io.h>
 #ifdef CONFIG_SPARC
 #include <linux/sunserialcore.h>
 #endif
 
-#include <asm/io.h>
 #include <asm/irq.h>
 
 #include "8250.h"
diff --git a/drivers/video/fbdev/s1d13xxxfb.c b/drivers/video/fbdev/s1d13xxxfb.c
index 83433cb0dfba42..96aa46dc696c94 100644
--- a/drivers/video/fbdev/s1d13xxxfb.c
+++ b/drivers/video/fbdev/s1d13xxxfb.c
@@ -32,8 +32,7 @@
 #include <linux/spinlock_types.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
-
-#include <asm/io.h>
+#include <linux/io.h>
 
 #include <video/s1d13xxxfb.h>
 
diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c
index 735355b0e02335..7df4228e25f05f 100644
--- a/drivers/video/fbdev/stifb.c
+++ b/drivers/video/fbdev/stifb.c
@@ -64,6 +64,7 @@
 #include <linux/fb.h>
 #include <linux/init.h>
 #include <linux/ioport.h>
+#include <linux/io.h>
 
 #include <asm/grfioctl.h>	/* for HP-UX compatibility */
 #include <asm/uaccess.h>
diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h
index c27dde7215b5b2..e399029b68c5bb 100644
--- a/include/linux/io-mapping.h
+++ b/include/linux/io-mapping.h
@@ -21,7 +21,7 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/bug.h>
-#include <asm/io.h>
+#include <linux/io.h>
 #include <asm/page.h>
 
 /*
diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h
index 29975c73a95347..366cf77953b55b 100644
--- a/include/linux/mtd/map.h
+++ b/include/linux/mtd/map.h
@@ -27,9 +27,9 @@
 #include <linux/string.h>
 #include <linux/bug.h>
 #include <linux/kernel.h>
+#include <linux/io.h>
 
 #include <asm/unaligned.h>
-#include <asm/io.h>
 #include <asm/barrier.h>
 
 #ifdef CONFIG_MTD_MAP_BANK_WIDTH_1
diff --git a/include/video/vga.h b/include/video/vga.h
index cac567f22e6232..d334e64c1c193e 100644
--- a/include/video/vga.h
+++ b/include/video/vga.h
@@ -18,7 +18,7 @@
 #define __linux_video_vga_h__
 
 #include <linux/types.h>
-#include <asm/io.h>
+#include <linux/io.h>
 #include <asm/vga.h>
 #include <asm/byteorder.h>
 

From 92b19ff50e8f242392d78b2aacc5b5b672f1796b Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 10 Aug 2015 23:07:06 -0400
Subject: [PATCH 176/734] cleanup IORESOURCE_CACHEABLE vs ioremap()

Quoting Arnd:
    I was thinking the opposite approach and basically removing all uses
    of IORESOURCE_CACHEABLE from the kernel. There are only a handful of
    them.and we can probably replace them all with hardcoded
    ioremap_cached() calls in the cases they are actually useful.

All existing usages of IORESOURCE_CACHEABLE call ioremap() instead of
ioremap_nocache() if the resource is cacheable, however ioremap() is
uncached by default. Clearly none of the existing usages care about the
cacheability. Particularly devm_ioremap_resource() never worked as
advertised since it always fell back to plain ioremap().

Clean this up as the new direction we want is to convert
ioremap_<type>() usages to memremap(..., flags).

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/arm/mach-clps711x/board-cdb89712.c |  2 +-
 arch/powerpc/kernel/pci_of_scan.c       |  2 +-
 arch/sparc/kernel/pci.c                 |  3 +--
 drivers/pci/probe.c                     |  3 +--
 drivers/pnp/manager.c                   |  2 --
 drivers/scsi/aic94xx/aic94xx_init.c     |  7 +------
 drivers/scsi/arcmsr/arcmsr_hba.c        |  5 +----
 drivers/scsi/mvsas/mv_init.c            | 15 ++++-----------
 drivers/video/fbdev/ocfb.c              |  1 -
 lib/devres.c                            | 13 ++++---------
 lib/pci_iomap.c                         |  7 ++-----
 11 files changed, 16 insertions(+), 44 deletions(-)

diff --git a/arch/arm/mach-clps711x/board-cdb89712.c b/arch/arm/mach-clps711x/board-cdb89712.c
index 1ec378c334e5cc..972abdb100284b 100644
--- a/arch/arm/mach-clps711x/board-cdb89712.c
+++ b/arch/arm/mach-clps711x/board-cdb89712.c
@@ -95,7 +95,7 @@ static struct physmap_flash_data cdb89712_bootrom_pdata __initdata = {
 
 static struct resource cdb89712_bootrom_resources[] __initdata = {
 	DEFINE_RES_NAMED(CS7_PHYS_BASE, SZ_128, "BOOTROM", IORESOURCE_MEM |
-			 IORESOURCE_CACHEABLE | IORESOURCE_READONLY),
+			 IORESOURCE_READONLY),
 };
 
 static struct platform_device cdb89712_bootrom_pdev __initdata = {
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index 42e02a2d570bf7..d4726addff0be4 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -102,7 +102,7 @@ static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev)
 			res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2];
 		} else if (i == dev->rom_base_reg) {
 			res = &dev->resource[PCI_ROM_RESOURCE];
-			flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE;
+			flags |= IORESOURCE_READONLY;
 		} else {
 			printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i);
 			continue;
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index c928bc64b4bac1..04da147e071206 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -231,8 +231,7 @@ static void pci_parse_of_addrs(struct platform_device *op,
 			res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2];
 		} else if (i == dev->rom_base_reg) {
 			res = &dev->resource[PCI_ROM_RESOURCE];
-			flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE
-			      | IORESOURCE_SIZEALIGN;
+			flags |= IORESOURCE_READONLY | IORESOURCE_SIZEALIGN;
 		} else {
 			printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i);
 			continue;
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index cefd636681b641..8ed37dd040568b 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -326,8 +326,7 @@ static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom)
 		struct resource *res = &dev->resource[PCI_ROM_RESOURCE];
 		dev->rom_base_reg = rom;
 		res->flags = IORESOURCE_MEM | IORESOURCE_PREFETCH |
-				IORESOURCE_READONLY | IORESOURCE_CACHEABLE |
-				IORESOURCE_SIZEALIGN;
+				IORESOURCE_READONLY | IORESOURCE_SIZEALIGN;
 		__pci_read_base(dev, pci_bar_mem32, res, rom);
 	}
 }
diff --git a/drivers/pnp/manager.c b/drivers/pnp/manager.c
index 9357aa779048a2..7ad3295752ef35 100644
--- a/drivers/pnp/manager.c
+++ b/drivers/pnp/manager.c
@@ -97,8 +97,6 @@ static int pnp_assign_mem(struct pnp_dev *dev, struct pnp_mem *rule, int idx)
 	/* ??? rule->flags restricted to 8 bits, all tests bogus ??? */
 	if (!(rule->flags & IORESOURCE_MEM_WRITEABLE))
 		res->flags |= IORESOURCE_READONLY;
-	if (rule->flags & IORESOURCE_MEM_CACHEABLE)
-		res->flags |= IORESOURCE_CACHEABLE;
 	if (rule->flags & IORESOURCE_MEM_RANGELENGTH)
 		res->flags |= IORESOURCE_RANGELENGTH;
 	if (rule->flags & IORESOURCE_MEM_SHADOWABLE)
diff --git a/drivers/scsi/aic94xx/aic94xx_init.c b/drivers/scsi/aic94xx/aic94xx_init.c
index 4b135cca42a128..140cb8e6fea202 100644
--- a/drivers/scsi/aic94xx/aic94xx_init.c
+++ b/drivers/scsi/aic94xx/aic94xx_init.c
@@ -100,12 +100,7 @@ static int asd_map_memio(struct asd_ha_struct *asd_ha)
 				   pci_name(asd_ha->pcidev));
 			goto Err;
 		}
-		if (io_handle->flags & IORESOURCE_CACHEABLE)
-			io_handle->addr = ioremap(io_handle->start,
-						  io_handle->len);
-		else
-			io_handle->addr = ioremap_nocache(io_handle->start,
-							  io_handle->len);
+		io_handle->addr = ioremap(io_handle->start, io_handle->len);
 		if (!io_handle->addr) {
 			asd_printk("couldn't map MBAR%d of %s\n", i==0?0:1,
 				   pci_name(asd_ha->pcidev));
diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c
index 914c39f9f38824..e4f77cad9fd81d 100644
--- a/drivers/scsi/arcmsr/arcmsr_hba.c
+++ b/drivers/scsi/arcmsr/arcmsr_hba.c
@@ -259,10 +259,7 @@ static bool arcmsr_remap_pciregion(struct AdapterControlBlock *acb)
 		addr = (unsigned long)pci_resource_start(pdev, 0);
 		range = pci_resource_len(pdev, 0);
 		flags = pci_resource_flags(pdev, 0);
-		if (flags & IORESOURCE_CACHEABLE)
-			mem_base0 = ioremap(addr, range);
-		else
-			mem_base0 = ioremap_nocache(addr, range);
+		mem_base0 = ioremap(addr, range);
 		if (!mem_base0) {
 			pr_notice("arcmsr%d: memory mapping region fail\n",
 				acb->host->host_no);
diff --git a/drivers/scsi/mvsas/mv_init.c b/drivers/scsi/mvsas/mv_init.c
index d40d734aa53a52..e654b5cec69e88 100644
--- a/drivers/scsi/mvsas/mv_init.c
+++ b/drivers/scsi/mvsas/mv_init.c
@@ -324,13 +324,9 @@ int mvs_ioremap(struct mvs_info *mvi, int bar, int bar_ex)
 			goto err_out;
 
 		res_flag_ex = pci_resource_flags(pdev, bar_ex);
-		if (res_flag_ex & IORESOURCE_MEM) {
-			if (res_flag_ex & IORESOURCE_CACHEABLE)
-				mvi->regs_ex = ioremap(res_start, res_len);
-			else
-				mvi->regs_ex = ioremap_nocache(res_start,
-						res_len);
-		} else
+		if (res_flag_ex & IORESOURCE_MEM)
+			mvi->regs_ex = ioremap(res_start, res_len);
+		else
 			mvi->regs_ex = (void *)res_start;
 		if (!mvi->regs_ex)
 			goto err_out;
@@ -342,10 +338,7 @@ int mvs_ioremap(struct mvs_info *mvi, int bar, int bar_ex)
 		goto err_out;
 
 	res_flag = pci_resource_flags(pdev, bar);
-	if (res_flag & IORESOURCE_CACHEABLE)
-		mvi->regs = ioremap(res_start, res_len);
-	else
-		mvi->regs = ioremap_nocache(res_start, res_len);
+	mvi->regs = ioremap(res_start, res_len);
 
 	if (!mvi->regs) {
 		if (mvi->regs_ex && (res_flag_ex & IORESOURCE_MEM))
diff --git a/drivers/video/fbdev/ocfb.c b/drivers/video/fbdev/ocfb.c
index de9819660ca09e..c9293aea8ec350 100644
--- a/drivers/video/fbdev/ocfb.c
+++ b/drivers/video/fbdev/ocfb.c
@@ -325,7 +325,6 @@ static int ocfb_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "I/O resource request failed\n");
 		return -ENXIO;
 	}
-	res->flags &= ~IORESOURCE_CACHEABLE;
 	fbdev->regs = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(fbdev->regs))
 		return PTR_ERR(fbdev->regs);
diff --git a/lib/devres.c b/lib/devres.c
index fbe2aac522e67d..f13a2468ff39c3 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -119,10 +119,9 @@ EXPORT_SYMBOL(devm_iounmap);
  * @dev: generic device to handle the resource for
  * @res: resource to be handled
  *
- * Checks that a resource is a valid memory region, requests the memory region
- * and ioremaps it either as cacheable or as non-cacheable memory depending on
- * the resource's flags. All operations are managed and will be undone on
- * driver detach.
+ * Checks that a resource is a valid memory region, requests the memory
+ * region and ioremaps it. All operations are managed and will be undone
+ * on driver detach.
  *
  * Returns a pointer to the remapped memory or an ERR_PTR() encoded error code
  * on failure. Usage example:
@@ -153,11 +152,7 @@ void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res)
 		return IOMEM_ERR_PTR(-EBUSY);
 	}
 
-	if (res->flags & IORESOURCE_CACHEABLE)
-		dest_ptr = devm_ioremap(dev, res->start, size);
-	else
-		dest_ptr = devm_ioremap_nocache(dev, res->start, size);
-
+	dest_ptr = devm_ioremap(dev, res->start, size);
 	if (!dest_ptr) {
 		dev_err(dev, "ioremap failed for resource %pR\n", res);
 		devm_release_mem_region(dev, res->start, size);
diff --git a/lib/pci_iomap.c b/lib/pci_iomap.c
index bcce5f14931013..e1930dbab2dae0 100644
--- a/lib/pci_iomap.c
+++ b/lib/pci_iomap.c
@@ -41,11 +41,8 @@ void __iomem *pci_iomap_range(struct pci_dev *dev,
 		len = maxlen;
 	if (flags & IORESOURCE_IO)
 		return __pci_ioport_map(dev, start, len);
-	if (flags & IORESOURCE_MEM) {
-		if (flags & IORESOURCE_CACHEABLE)
-			return ioremap(start, len);
-		return ioremap_nocache(start, len);
-	}
+	if (flags & IORESOURCE_MEM)
+		return ioremap(start, len);
 	/* What? */
 	return NULL;
 }

From fa92a31b3335478c545cdc8e79e1e9b788184e6b Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Mon, 10 Aug 2015 10:45:45 +0200
Subject: [PATCH 177/734] ideapad-laptop: Add Lenovo Yoga 3 14 to no_hw_rfkill
 dmi list

Like some of the other Yoga models the Lenovo Yoga 3 14 does not have a
hw rfkill switch, and trying to read the hw rfkill switch through the
ideapad module causes it to always reported blocking breaking wifi.

This commit adds the Lenovo Yoga 3 14 to the no_hw_rfkill dmi list, fixing
the wifi breakage.

BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1239050
Cc: stable@vger.kernel.org
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Darren Hart <dvhart@linux.intel.com>
---
 drivers/platform/x86/ideapad-laptop.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index 76b57388d01b5b..81c3e582309a14 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -852,6 +852,13 @@ static const struct dmi_system_id no_hw_rfkill_list[] = {
 			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Yoga 2"),
 		},
 	},
+	{
+		.ident = "Lenovo Yoga 3 14",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Yoga 3 14"),
+		},
+	},
 	{
 		.ident = "Lenovo Yoga 3 Pro 1370",
 		.matches = {

From e88e28795cb043e65a5197238471c2f573fef3cd Mon Sep 17 00:00:00 2001
From: Azael Avalos <coproscefalo@gmail.com>
Date: Mon, 3 Aug 2015 11:44:08 -0600
Subject: [PATCH 178/734] MAINTAINERS: Remove Toshiba Linux mailing list
 address

As of March 31th 2015, the mailing-list service finished [1].

This patch simply removes such address.

[1] http://goo.gl/F6jS5r

Signed-off-by: Azael Avalos <coproscefalo@gmail.com>
Signed-off-by: Darren Hart <dvhart@linux.intel.com>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index a2264167791acd..cbeb4c145d8f7b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10314,7 +10314,6 @@ F:	drivers/platform/x86/toshiba_haps.c
 
 TOSHIBA SMM DRIVER
 M:	Jonathan Buzzard <jonathan@buzzard.org.uk>
-L:	tlinux-users@tce.toshiba-dme.co.jp
 W:	http://www.buzzard.org.uk/toshiba/
 S:	Maintained
 F:	drivers/char/toshiba.c

From 97ade7697bb104523417d0f9b5e38a8bf55ed7f1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=81ukasz=20Stelmach?= <stlman@poczta.fm>
Date: Wed, 29 Jul 2015 21:31:23 +0200
Subject: [PATCH 179/734] asus-laptop: Add key found on Asus F3M
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Asus F3M has two keys labeled with an icon of a touchpad. The first,
reported as 0x6B is next to the power key and the second, reported as
0x6A, is F9 combined with Fn button. When I was pressing the latter, I was
getting "Unknown key 6a pressed" message before applying this patch.

Asus F3M does not support WMI so the commit does not update key mappings
in the asus-nb-wmi.c file.

I have not tested this mapping on any other Asus laptop.

Signed-off-by: Łukasz Stelmach <stlman@poczta.fm>
Signed-off-by: Darren Hart <dvhart@linux.intel.com>
---
 drivers/platform/x86/asus-laptop.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c
index 58d29c4f2840c4..f2b5d0a8adf03a 100644
--- a/drivers/platform/x86/asus-laptop.c
+++ b/drivers/platform/x86/asus-laptop.c
@@ -332,6 +332,7 @@ static const struct key_entry asus_keymap[] = {
 	{KE_KEY, 0x65, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + TV */
 	{KE_KEY, 0x66, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + TV */
 	{KE_KEY, 0x67, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + TV */
+	{KE_KEY, 0x6A, { KEY_TOUCHPAD_TOGGLE } }, /* Lock Touchpad Fn + F9 */
 	{KE_KEY, 0x6B, { KEY_TOUCHPAD_TOGGLE } }, /* Lock Touchpad */
 	{KE_KEY, 0x6C, { KEY_SLEEP } }, /* Suspend */
 	{KE_KEY, 0x6D, { KEY_SLEEP } }, /* Hibernate */

From 6d212b8adce4ddaa20229f781cc86bccb396a9fe Mon Sep 17 00:00:00 2001
From: Sebastian Krzyszkowiak <dos@dosowisko.net>
Date: Sun, 19 Jul 2015 01:10:21 +0200
Subject: [PATCH 180/734] ideapad-laptop: add alternative representation for
 Yoga 2 to DMI table

There is at least one (mine) Yoga 2 Pro in existence that has incorrect
product version stored in DMI (reading as "INVALID"), causing it to not be
recognized as Yoga 2 by ideapad-laptop module, which in turn causes
non-existent hardware rfkills to be always reported as blocked.

This change adds a second check by board name, which is "Yoga2".
Looks like it also happens to be "INVALID" on some other Yoga 2 machines
where product version is correct instead, so the original check is left
intact to catch both cases.

Signed-off-by: Sebastian Krzyszkowiak <dos@dosowisko.net>
Signed-off-by: Darren Hart <dvhart@linux.intel.com>
---
 drivers/platform/x86/ideapad-laptop.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index 81c3e582309a14..fce49f3c6ed688 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -859,6 +859,13 @@ static const struct dmi_system_id no_hw_rfkill_list[] = {
 			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Yoga 3 14"),
 		},
 	},
+	{
+		.ident = "Lenovo Yoga 2 11 / 13 / Pro",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_BOARD_NAME, "Yoga2"),
+		},
+	},
 	{
 		.ident = "Lenovo Yoga 3 Pro 1370",
 		.matches = {

From ea89ef1f7bf7c71b4f533a6ffb3f6f124890b37b Mon Sep 17 00:00:00 2001
From: Eddie Huang <eddie.huang@mediatek.com>
Date: Thu, 6 Aug 2015 15:22:10 +0800
Subject: [PATCH 181/734] i2c: mediatek: Reset DMA engine in hardware init
 function

Reset DMA in hardware init function to avoid unknown hardware state
before do any I2C operation.

Signed-off-by: Liguo Zhang <liguo.zhang@mediatek.com>
Signed-off-by: Eddie Huang <eddie.huang@mediatek.com>
Reviewed-by: Daniel Kurtz <djkurtz@chromium.org>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-mt65xx.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c
index 9920eef74672ff..e28ad4c17d3d9f 100644
--- a/drivers/i2c/busses/i2c-mt65xx.c
+++ b/drivers/i2c/busses/i2c-mt65xx.c
@@ -59,6 +59,7 @@
 #define I2C_DMA_START_EN		0x0001
 #define I2C_DMA_INT_FLAG_NONE		0x0000
 #define I2C_DMA_CLR_FLAG		0x0000
+#define I2C_DMA_HARD_RST		0x0002
 
 #define I2C_DEFAULT_SPEED		100000	/* hz */
 #define MAX_FS_MODE_SPEED		400000
@@ -81,6 +82,7 @@ enum DMA_REGS_OFFSET {
 	OFFSET_INT_FLAG = 0x0,
 	OFFSET_INT_EN = 0x04,
 	OFFSET_EN = 0x08,
+	OFFSET_RST = 0x0c,
 	OFFSET_CON = 0x18,
 	OFFSET_TX_MEM_ADDR = 0x1c,
 	OFFSET_RX_MEM_ADDR = 0x20,
@@ -262,6 +264,10 @@ static void mtk_i2c_init_hw(struct mtk_i2c *i2c)
 		      I2C_CONTROL_CLK_EXT_EN | I2C_CONTROL_DMA_EN;
 	writew(control_reg, i2c->base + OFFSET_CONTROL);
 	writew(I2C_DELAY_LEN, i2c->base + OFFSET_DELAY_LEN);
+
+	writel(I2C_DMA_HARD_RST, i2c->pdmabase + OFFSET_RST);
+	udelay(50);
+	writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_RST);
 }
 
 /*

From 28c0a8433dd56146627d00907ad99084654741e8 Mon Sep 17 00:00:00 2001
From: Eddie Huang <eddie.huang@mediatek.com>
Date: Thu, 6 Aug 2015 15:22:11 +0800
Subject: [PATCH 182/734] i2c: mediatek: Fixup i2c ack error interrupt handling

When occur i2c ack error, i2c controller generate two interrupts,
first is the ack error interrupt, then the complete interrupt.
i2c interrupt handler should keep the two interrupt value, and only
call complete() for the complete interrupt.

Signed-off-by: Liguo Zhang <liguo.zhang@mediatek.com>
Signed-off-by: Eddie Huang <eddie.huang@mediatek.com>
Reviewed-by: Daniel Kurtz <djkurtz@chromium.org>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-mt65xx.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c
index e28ad4c17d3d9f..c02e6c018c39f0 100644
--- a/drivers/i2c/busses/i2c-mt65xx.c
+++ b/drivers/i2c/busses/i2c-mt65xx.c
@@ -557,15 +557,22 @@ static irqreturn_t mtk_i2c_irq(int irqno, void *dev_id)
 {
 	struct mtk_i2c *i2c = dev_id;
 	u16 restart_flag = 0;
+	u16 intr_stat;
 
 	if (i2c->dev_comp->auto_restart)
 		restart_flag = I2C_RS_TRANSFER;
 
-	i2c->irq_stat = readw(i2c->base + OFFSET_INTR_STAT);
-	writew(restart_flag | I2C_HS_NACKERR | I2C_ACKERR
-		| I2C_TRANSAC_COMP, i2c->base + OFFSET_INTR_STAT);
+	intr_stat = readw(i2c->base + OFFSET_INTR_STAT);
+	writew(intr_stat, i2c->base + OFFSET_INTR_STAT);
 
-	complete(&i2c->msg_complete);
+	/*
+	 * when occurs ack error, i2c controller generate two interrupts
+	 * first is the ack error interrupt, then the complete interrupt
+	 * i2c->irq_stat need keep the two interrupt value.
+	 */
+	i2c->irq_stat |= intr_stat;
+	if (i2c->irq_stat & (I2C_TRANSAC_COMP | restart_flag))
+		complete(&i2c->msg_complete);
 
 	return IRQ_HANDLED;
 }

From 1ed8111443ae8caa455e7107031da36d1a6d351a Mon Sep 17 00:00:00 2001
From: Nicolas Boichat <drinkcat@chromium.org>
Date: Tue, 11 Aug 2015 18:04:21 +0800
Subject: [PATCH 183/734] regmap: Move documentation to regmap.h

Init functions defined in regmap*.c files are now prefixed with
__, take lockdep key and class parameters, and should not be
called directly: move the documentation to regmap.h, where the
macros are defined.

Signed-off-by: Nicolas Boichat <drinkcat@chromium.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap-ac97.c |  19 ----
 drivers/base/regmap/regmap-i2c.c  |  19 ----
 drivers/base/regmap/regmap-mmio.c |  23 ----
 drivers/base/regmap/regmap-spi.c  |  19 ----
 drivers/base/regmap/regmap-spmi.c |  34 ------
 drivers/base/regmap/regmap.c      |  25 -----
 include/linux/regmap.h            | 173 ++++++++++++++++++++++++++++--
 7 files changed, 162 insertions(+), 150 deletions(-)

diff --git a/drivers/base/regmap/regmap-ac97.c b/drivers/base/regmap/regmap-ac97.c
index aa631be8b82183..c03ebfd4c73147 100644
--- a/drivers/base/regmap/regmap-ac97.c
+++ b/drivers/base/regmap/regmap-ac97.c
@@ -78,15 +78,6 @@ static const struct regmap_bus ac97_regmap_bus = {
 	.reg_read = regmap_ac97_reg_read,
 };
 
-/**
- * regmap_init_ac97(): Initialise AC'97 register map
- *
- * @ac97: Device that will be interacted with
- * @config: Configuration for register map
- *
- * The return value will be an ERR_PTR() on error or a valid pointer to
- * a struct regmap.
- */
 struct regmap *__regmap_init_ac97(struct snd_ac97 *ac97,
 				  const struct regmap_config *config,
 				  struct lock_class_key *lock_key,
@@ -97,16 +88,6 @@ struct regmap *__regmap_init_ac97(struct snd_ac97 *ac97,
 }
 EXPORT_SYMBOL_GPL(__regmap_init_ac97);
 
-/**
- * devm_regmap_init_ac97(): Initialise AC'97 register map
- *
- * @ac97: Device that will be interacted with
- * @config: Configuration for register map
- *
- * The return value will be an ERR_PTR() on error or a valid pointer
- * to a struct regmap.  The regmap will be automatically freed by the
- * device management code.
- */
 struct regmap *__devm_regmap_init_ac97(struct snd_ac97 *ac97,
 				       const struct regmap_config *config,
 				       struct lock_class_key *lock_key,
diff --git a/drivers/base/regmap/regmap-i2c.c b/drivers/base/regmap/regmap-i2c.c
index 3163b22e2baf06..7007d6ea333c3f 100644
--- a/drivers/base/regmap/regmap-i2c.c
+++ b/drivers/base/regmap/regmap-i2c.c
@@ -233,15 +233,6 @@ static const struct regmap_bus *regmap_get_i2c_bus(struct i2c_client *i2c,
 	return ERR_PTR(-ENOTSUPP);
 }
 
-/**
- * regmap_init_i2c(): Initialise register map
- *
- * @i2c: Device that will be interacted with
- * @config: Configuration for register map
- *
- * The return value will be an ERR_PTR() on error or a valid pointer to
- * a struct regmap.
- */
 struct regmap *__regmap_init_i2c(struct i2c_client *i2c,
 				 const struct regmap_config *config,
 				 struct lock_class_key *lock_key,
@@ -257,16 +248,6 @@ struct regmap *__regmap_init_i2c(struct i2c_client *i2c,
 }
 EXPORT_SYMBOL_GPL(__regmap_init_i2c);
 
-/**
- * devm_regmap_init_i2c(): Initialise managed register map
- *
- * @i2c: Device that will be interacted with
- * @config: Configuration for register map
- *
- * The return value will be an ERR_PTR() on error or a valid pointer
- * to a struct regmap.  The regmap will be automatically freed by the
- * device management code.
- */
 struct regmap *__devm_regmap_init_i2c(struct i2c_client *i2c,
 				      const struct regmap_config *config,
 				      struct lock_class_key *lock_key,
diff --git a/drivers/base/regmap/regmap-mmio.c b/drivers/base/regmap/regmap-mmio.c
index a1b2b270e4bc3c..426a57e41ac760 100644
--- a/drivers/base/regmap/regmap-mmio.c
+++ b/drivers/base/regmap/regmap-mmio.c
@@ -296,17 +296,6 @@ static struct regmap_mmio_context *regmap_mmio_gen_context(struct device *dev,
 	return ERR_PTR(ret);
 }
 
-/**
- * regmap_init_mmio_clk(): Initialise register map with register clock
- *
- * @dev: Device that will be interacted with
- * @clk_id: register clock consumer ID
- * @regs: Pointer to memory-mapped IO region
- * @config: Configuration for register map
- *
- * The return value will be an ERR_PTR() on error or a valid pointer to
- * a struct regmap.
- */
 struct regmap *__regmap_init_mmio_clk(struct device *dev, const char *clk_id,
 				      void __iomem *regs,
 				      const struct regmap_config *config,
@@ -324,18 +313,6 @@ struct regmap *__regmap_init_mmio_clk(struct device *dev, const char *clk_id,
 }
 EXPORT_SYMBOL_GPL(__regmap_init_mmio_clk);
 
-/**
- * devm_regmap_init_mmio_clk(): Initialise managed register map with clock
- *
- * @dev: Device that will be interacted with
- * @clk_id: register clock consumer ID
- * @regs: Pointer to memory-mapped IO region
- * @config: Configuration for register map
- *
- * The return value will be an ERR_PTR() on error or a valid pointer
- * to a struct regmap.  The regmap will be automatically freed by the
- * device management code.
- */
 struct regmap *__devm_regmap_init_mmio_clk(struct device *dev,
 					   const char *clk_id,
 					   void __iomem *regs,
diff --git a/drivers/base/regmap/regmap-spi.c b/drivers/base/regmap/regmap-spi.c
index 4c7850d660d152..edd9a839d004dc 100644
--- a/drivers/base/regmap/regmap-spi.c
+++ b/drivers/base/regmap/regmap-spi.c
@@ -113,15 +113,6 @@ static struct regmap_bus regmap_spi = {
 	.val_format_endian_default = REGMAP_ENDIAN_BIG,
 };
 
-/**
- * regmap_init_spi(): Initialise register map
- *
- * @spi: Device that will be interacted with
- * @config: Configuration for register map
- *
- * The return value will be an ERR_PTR() on error or a valid pointer to
- * a struct regmap.
- */
 struct regmap *__regmap_init_spi(struct spi_device *spi,
 				 const struct regmap_config *config,
 				 struct lock_class_key *lock_key,
@@ -132,16 +123,6 @@ struct regmap *__regmap_init_spi(struct spi_device *spi,
 }
 EXPORT_SYMBOL_GPL(__regmap_init_spi);
 
-/**
- * devm_regmap_init_spi(): Initialise register map
- *
- * @spi: Device that will be interacted with
- * @config: Configuration for register map
- *
- * The return value will be an ERR_PTR() on error or a valid pointer
- * to a struct regmap.  The map will be automatically freed by the
- * device management code.
- */
 struct regmap *__devm_regmap_init_spi(struct spi_device *spi,
 				      const struct regmap_config *config,
 				      struct lock_class_key *lock_key,
diff --git a/drivers/base/regmap/regmap-spmi.c b/drivers/base/regmap/regmap-spmi.c
index 7f50f5862d395a..7e58f656039900 100644
--- a/drivers/base/regmap/regmap-spmi.c
+++ b/drivers/base/regmap/regmap-spmi.c
@@ -91,14 +91,6 @@ static struct regmap_bus regmap_spmi_base = {
 	.val_format_endian_default	= REGMAP_ENDIAN_NATIVE,
 };
 
-/**
- * regmap_init_spmi_base(): Create regmap for the Base register space
- * @sdev:	SPMI device that will be interacted with
- * @config:	Configuration for register map
- *
- * The return value will be an ERR_PTR() on error or a valid pointer to
- * a struct regmap.
- */
 struct regmap *__regmap_init_spmi_base(struct spmi_device *sdev,
 				       const struct regmap_config *config,
 				       struct lock_class_key *lock_key,
@@ -109,15 +101,6 @@ struct regmap *__regmap_init_spmi_base(struct spmi_device *sdev,
 }
 EXPORT_SYMBOL_GPL(__regmap_init_spmi_base);
 
-/**
- * devm_regmap_init_spmi_base(): Create managed regmap for Base register space
- * @sdev:	SPMI device that will be interacted with
- * @config:	Configuration for register map
- *
- * The return value will be an ERR_PTR() on error or a valid pointer
- * to a struct regmap.  The regmap will be automatically freed by the
- * device management code.
- */
 struct regmap *__devm_regmap_init_spmi_base(struct spmi_device *sdev,
 					    const struct regmap_config *config,
 					    struct lock_class_key *lock_key,
@@ -228,14 +211,6 @@ static struct regmap_bus regmap_spmi_ext = {
 	.val_format_endian_default	= REGMAP_ENDIAN_NATIVE,
 };
 
-/**
- * regmap_init_spmi_ext(): Create regmap for Ext register space
- * @sdev:	Device that will be interacted with
- * @config:	Configuration for register map
- *
- * The return value will be an ERR_PTR() on error or a valid pointer to
- * a struct regmap.
- */
 struct regmap *__regmap_init_spmi_ext(struct spmi_device *sdev,
 				      const struct regmap_config *config,
 				      struct lock_class_key *lock_key,
@@ -246,15 +221,6 @@ struct regmap *__regmap_init_spmi_ext(struct spmi_device *sdev,
 }
 EXPORT_SYMBOL_GPL(__regmap_init_spmi_ext);
 
-/**
- * devm_regmap_init_spmi_ext(): Create managed regmap for Ext register space
- * @sdev:	SPMI device that will be interacted with
- * @config:	Configuration for register map
- *
- * The return value will be an ERR_PTR() on error or a valid pointer
- * to a struct regmap.  The regmap will be automatically freed by the
- * device management code.
- */
 struct regmap *__devm_regmap_init_spmi_ext(struct spmi_device *sdev,
 					   const struct regmap_config *config,
 					   struct lock_class_key *lock_key,
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index b9fddccd6e06fc..53ba9d9e17d1b7 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -515,18 +515,6 @@ enum regmap_endian regmap_get_val_endian(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(regmap_get_val_endian);
 
-/**
- * regmap_init(): Initialise register map
- *
- * @dev: Device that will be interacted with
- * @bus: Bus-specific callbacks to use with device
- * @bus_context: Data passed to bus-specific callbacks
- * @config: Configuration for register map
- *
- * The return value will be an ERR_PTR() on error or a valid pointer to
- * a struct regmap.  This function should generally not be called
- * directly, it should be called by bus-specific init functions.
- */
 struct regmap *__regmap_init(struct device *dev,
 			     const struct regmap_bus *bus,
 			     void *bus_context,
@@ -912,19 +900,6 @@ static void devm_regmap_release(struct device *dev, void *res)
 	regmap_exit(*(struct regmap **)res);
 }
 
-/**
- * devm_regmap_init(): Initialise managed register map
- *
- * @dev: Device that will be interacted with
- * @bus: Bus-specific callbacks to use with device
- * @bus_context: Data passed to bus-specific callbacks
- * @config: Configuration for register map
- *
- * The return value will be an ERR_PTR() on error or a valid pointer
- * to a struct regmap.  This function should generally not be called
- * directly, it should be called by bus-specific init functions.  The
- * map will be automatically freed by the device management code.
- */
 struct regmap *__devm_regmap_init(struct device *dev,
 				  const struct regmap_bus *bus,
 				  void *bus_context,
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 5d7027286032d2..ee032eca630d49 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -419,65 +419,202 @@ struct regmap *__devm_regmap_init_ac97(struct snd_ac97 *ac97,
 #define __regmap_lockdep_wrapper(fn, name, ...) fn(__VA_ARGS__, NULL, NULL)
 #endif
 
+/**
+ * regmap_init(): Initialise register map
+ *
+ * @dev: Device that will be interacted with
+ * @bus: Bus-specific callbacks to use with device
+ * @bus_context: Data passed to bus-specific callbacks
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer to
+ * a struct regmap.  This function should generally not be called
+ * directly, it should be called by bus-specific init functions.
+ */
 #define regmap_init(dev, bus, bus_context, config)			\
 	__regmap_lockdep_wrapper(__regmap_init, #config,		\
 				dev, bus, bus_context, config)
 int regmap_attach_dev(struct device *dev, struct regmap *map,
 		      const struct regmap_config *config);
+
+/**
+ * regmap_init_i2c(): Initialise register map
+ *
+ * @i2c: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer to
+ * a struct regmap.
+ */
 #define regmap_init_i2c(i2c, config)					\
 	__regmap_lockdep_wrapper(__regmap_init_i2c, #config,		\
 				i2c, config)
+
+/**
+ * regmap_init_spi(): Initialise register map
+ *
+ * @spi: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer to
+ * a struct regmap.
+ */
 #define regmap_init_spi(dev, config)					\
 	__regmap_lockdep_wrapper(__regmap_init_spi, #config,		\
 				dev, config)
+
+/**
+ * regmap_init_spmi_base(): Create regmap for the Base register space
+ * @sdev:	SPMI device that will be interacted with
+ * @config:	Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer to
+ * a struct regmap.
+ */
 #define regmap_init_spmi_base(dev, config)				\
 	__regmap_lockdep_wrapper(__regmap_init_spmi_base, #config,	\
 				dev, config)
+
+/**
+ * regmap_init_spmi_ext(): Create regmap for Ext register space
+ * @sdev:	Device that will be interacted with
+ * @config:	Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer to
+ * a struct regmap.
+ */
 #define regmap_init_spmi_ext(dev, config)				\
 	__regmap_lockdep_wrapper(__regmap_init_spmi_ext, #config,	\
 				dev, config)
+
+/**
+ * regmap_init_mmio_clk(): Initialise register map with register clock
+ *
+ * @dev: Device that will be interacted with
+ * @clk_id: register clock consumer ID
+ * @regs: Pointer to memory-mapped IO region
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer to
+ * a struct regmap.
+ */
 #define regmap_init_mmio_clk(dev, clk_id, regs, config)			\
 	__regmap_lockdep_wrapper(__regmap_init_mmio_clk, #config,	\
 				dev, clk_id, regs, config)
+
+/**
+ * regmap_init_mmio(): Initialise register map
+ *
+ * @dev: Device that will be interacted with
+ * @regs: Pointer to memory-mapped IO region
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer to
+ * a struct regmap.
+ */
+#define regmap_init_mmio(dev, regs, config)		\
+	regmap_init_mmio_clk(dev, NULL, regs, config)
+
+/**
+ * regmap_init_ac97(): Initialise AC'97 register map
+ *
+ * @ac97: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer to
+ * a struct regmap.
+ */
 #define regmap_init_ac97(ac97, config)					\
 	__regmap_lockdep_wrapper(__regmap_init_ac97, #config,		\
 				ac97, config)
 bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg);
 
+/**
+ * devm_regmap_init(): Initialise managed register map
+ *
+ * @dev: Device that will be interacted with
+ * @bus: Bus-specific callbacks to use with device
+ * @bus_context: Data passed to bus-specific callbacks
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer
+ * to a struct regmap.  This function should generally not be called
+ * directly, it should be called by bus-specific init functions.  The
+ * map will be automatically freed by the device management code.
+ */
 #define devm_regmap_init(dev, bus, bus_context, config)			\
 	__regmap_lockdep_wrapper(__devm_regmap_init, #config,		\
 				dev, bus, bus_context, config)
+
+/**
+ * devm_regmap_init_i2c(): Initialise managed register map
+ *
+ * @i2c: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer
+ * to a struct regmap.  The regmap will be automatically freed by the
+ * device management code.
+ */
 #define devm_regmap_init_i2c(i2c, config)				\
 	__regmap_lockdep_wrapper(__devm_regmap_init_i2c, #config,	\
 				i2c, config)
+
+/**
+ * devm_regmap_init_spi(): Initialise register map
+ *
+ * @spi: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer
+ * to a struct regmap.  The map will be automatically freed by the
+ * device management code.
+ */
 #define devm_regmap_init_spi(dev, config)				\
 	__regmap_lockdep_wrapper(__devm_regmap_init_spi, #config,	\
 				dev, config)
+
+/**
+ * devm_regmap_init_spmi_base(): Create managed regmap for Base register space
+ * @sdev:	SPMI device that will be interacted with
+ * @config:	Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer
+ * to a struct regmap.  The regmap will be automatically freed by the
+ * device management code.
+ */
 #define devm_regmap_init_spmi_base(dev, config)				\
 	__regmap_lockdep_wrapper(__devm_regmap_init_spmi_base, #config,	\
 				dev, config)
+
+/**
+ * devm_regmap_init_spmi_ext(): Create managed regmap for Ext register space
+ * @sdev:	SPMI device that will be interacted with
+ * @config:	Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer
+ * to a struct regmap.  The regmap will be automatically freed by the
+ * device management code.
+ */
 #define devm_regmap_init_spmi_ext(dev, config)				\
 	__regmap_lockdep_wrapper(__devm_regmap_init_spmi_ext, #config,	\
 				dev, config)
-#define devm_regmap_init_mmio_clk(dev, clk_id, regs, config)		\
-	__regmap_lockdep_wrapper(__devm_regmap_init_mmio_clk, #config,	\
-				dev, clk_id, regs, config)
-#define devm_regmap_init_ac97(ac97, config)				\
-	__regmap_lockdep_wrapper(__devm_regmap_init_ac97, #config,	\
-				ac97, config)
 
 /**
- * regmap_init_mmio(): Initialise register map
+ * devm_regmap_init_mmio_clk(): Initialise managed register map with clock
  *
  * @dev: Device that will be interacted with
+ * @clk_id: register clock consumer ID
  * @regs: Pointer to memory-mapped IO region
  * @config: Configuration for register map
  *
- * The return value will be an ERR_PTR() on error or a valid pointer to
- * a struct regmap.
+ * The return value will be an ERR_PTR() on error or a valid pointer
+ * to a struct regmap.  The regmap will be automatically freed by the
+ * device management code.
  */
-#define regmap_init_mmio(dev, regs, config)		\
-	regmap_init_mmio_clk(dev, NULL, regs, config)
+#define devm_regmap_init_mmio_clk(dev, clk_id, regs, config)		\
+	__regmap_lockdep_wrapper(__devm_regmap_init_mmio_clk, #config,	\
+				dev, clk_id, regs, config)
 
 /**
  * devm_regmap_init_mmio(): Initialise managed register map
@@ -493,6 +630,20 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg);
 #define devm_regmap_init_mmio(dev, regs, config)		\
 	devm_regmap_init_mmio_clk(dev, NULL, regs, config)
 
+/**
+ * devm_regmap_init_ac97(): Initialise AC'97 register map
+ *
+ * @ac97: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer
+ * to a struct regmap.  The regmap will be automatically freed by the
+ * device management code.
+ */
+#define devm_regmap_init_ac97(ac97, config)				\
+	__regmap_lockdep_wrapper(__devm_regmap_init_ac97, #config,	\
+				ac97, config)
+
 void regmap_exit(struct regmap *map);
 int regmap_reinit_cache(struct regmap *map,
 			const struct regmap_config *config);

From 9f61668073a8d80650622e792aff876db9ca23c6 Mon Sep 17 00:00:00 2001
From: Daniel Wagner <daniel.wagner@bmw-carit.de>
Date: Mon, 10 Aug 2015 14:35:46 +0200
Subject: [PATCH 184/734] tracing: Allow triggers to filter for CPU ids and
 process names

By extending the filter rules by more generic fields
we can write triggers filters like

  echo 'stacktrace if cpu == 1' > \
	/sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger

or

  echo 'stacktrace if comm == sshd'  > \
	/sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger

CPU and COMM are not part of struct trace_entry. We could add the two
new fields to ftrace_common_field list and fix up all depending
sides. But that looks pretty ugly. Another thing I would like to
avoid that the 'format' file contents changes.

All this can be avoided by introducing another list which contains
non field members of struct trace_entry.

Link: http://lkml.kernel.org/r/1439210146-24707-1-git-send-email-daniel.wagner@bmw-carit.de

Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace_events.c        | 25 ++++++++++++++
 kernel/trace/trace_events_filter.c | 54 ++++++++++++++++++++++++++++--
 2 files changed, 77 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 404a372ad85a94..7ca09cdc20c2f9 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -30,6 +30,7 @@
 DEFINE_MUTEX(event_mutex);
 
 LIST_HEAD(ftrace_events);
+static LIST_HEAD(ftrace_generic_fields);
 static LIST_HEAD(ftrace_common_fields);
 
 #define GFP_TRACE (GFP_KERNEL | __GFP_ZERO)
@@ -94,6 +95,10 @@ trace_find_event_field(struct trace_event_call *call, char *name)
 	struct ftrace_event_field *field;
 	struct list_head *head;
 
+	field = __find_event_field(&ftrace_generic_fields, name);
+	if (field)
+		return field;
+
 	field = __find_event_field(&ftrace_common_fields, name);
 	if (field)
 		return field;
@@ -144,6 +149,13 @@ int trace_define_field(struct trace_event_call *call, const char *type,
 }
 EXPORT_SYMBOL_GPL(trace_define_field);
 
+#define __generic_field(type, item, filter_type)			\
+	ret = __trace_define_field(&ftrace_generic_fields, #type,	\
+				   #item, 0, 0, is_signed_type(type),	\
+				   filter_type);			\
+	if (ret)							\
+		return ret;
+
 #define __common_field(type, item)					\
 	ret = __trace_define_field(&ftrace_common_fields, #type,	\
 				   "common_" #item,			\
@@ -153,6 +165,16 @@ EXPORT_SYMBOL_GPL(trace_define_field);
 	if (ret)							\
 		return ret;
 
+static int trace_define_generic_fields(void)
+{
+	int ret;
+
+	__generic_field(int, cpu, FILTER_OTHER);
+	__generic_field(char *, comm, FILTER_PTR_STRING);
+
+	return ret;
+}
+
 static int trace_define_common_fields(void)
 {
 	int ret;
@@ -2671,6 +2693,9 @@ static __init int event_trace_init(void)
 	if (!entry)
 		pr_warn("Could not create tracefs 'available_events' entry\n");
 
+	if (trace_define_generic_fields())
+		pr_warn("tracing: Failed to allocated generic fields");
+
 	if (trace_define_common_fields())
 		pr_warn("tracing: Failed to allocate common fields");
 
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index d81d6f302b14b3..bd1bf184c5c98b 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -252,6 +252,50 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event)
 	return match;
 }
 
+/* Filter predicate for CPUs. */
+static int filter_pred_cpu(struct filter_pred *pred, void *event)
+{
+	int cpu, cmp;
+	int match = 0;
+
+	cpu = raw_smp_processor_id();
+	cmp = pred->val;
+
+	switch (pred->op) {
+	case OP_EQ:
+		match = cpu == cmp;
+		break;
+	case OP_LT:
+		match = cpu < cmp;
+		break;
+	case OP_LE:
+		match = cpu <= cmp;
+		break;
+	case OP_GT:
+		match = cpu > cmp;
+		break;
+	case OP_GE:
+		match = cpu >= cmp;
+		break;
+	default:
+		break;
+	}
+
+	return !!match == !pred->not;
+}
+
+/* Filter predicate for COMM. */
+static int filter_pred_comm(struct filter_pred *pred, void *event)
+{
+	int cmp, match;
+
+	cmp = pred->regex.match(current->comm, &pred->regex,
+				pred->regex.field_len);
+	match = cmp ^ pred->not;
+
+	return match;
+}
+
 static int filter_pred_none(struct filter_pred *pred, void *event)
 {
 	return 0;
@@ -1002,7 +1046,10 @@ static int init_pred(struct filter_parse_state *ps,
 	if (is_string_field(field)) {
 		filter_build_regex(pred);
 
-		if (field->filter_type == FILTER_STATIC_STRING) {
+		if (!strcmp(field->name, "comm")) {
+			fn = filter_pred_comm;
+			pred->regex.field_len = TASK_COMM_LEN;
+		} else if (field->filter_type == FILTER_STATIC_STRING) {
 			fn = filter_pred_string;
 			pred->regex.field_len = field->size;
 		} else if (field->filter_type == FILTER_DYN_STRING)
@@ -1025,7 +1072,10 @@ static int init_pred(struct filter_parse_state *ps,
 		}
 		pred->val = val;
 
-		fn = select_comparison_fn(pred->op, field->size,
+		if (!strcmp(field->name, "cpu"))
+			fn = filter_pred_cpu;
+		else
+			fn = select_comparison_fn(pred->op, field->size,
 					  field->is_signed);
 		if (!fn) {
 			parse_error(ps, FILT_ERR_INVALID_OP, 0);

From aa62efff65ba572814511efa68cb158fe9e960c4 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 11 Aug 2015 13:29:48 +0100
Subject: [PATCH 185/734] MAINTAINERS: The keyrings mailing list has moved

The keyrings mailing list has moved to keyrings@vger.kernel.org

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 MAINTAINERS | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index f8be2f79719758..bde2e3f5a10b3b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5935,7 +5935,7 @@ F:	kernel/kexec.c
 
 KEYS/KEYRINGS:
 M:	David Howells <dhowells@redhat.com>
-L:	keyrings@linux-nfs.org
+L:	keyrings@vger.kernel.org
 S:	Maintained
 F:	Documentation/security/keys.txt
 F:	include/linux/key.h
@@ -5947,7 +5947,7 @@ KEYS-TRUSTED
 M:	David Safford <safford@us.ibm.com>
 M:	Mimi Zohar <zohar@linux.vnet.ibm.com>
 L:	linux-security-module@vger.kernel.org
-L:	keyrings@linux-nfs.org
+L:	keyrings@vger.kernel.org
 S:	Supported
 F:	Documentation/security/keys-trusted-encrypted.txt
 F:	include/keys/trusted-type.h
@@ -5958,7 +5958,7 @@ KEYS-ENCRYPTED
 M:	Mimi Zohar <zohar@linux.vnet.ibm.com>
 M:	David Safford <safford@us.ibm.com>
 L:	linux-security-module@vger.kernel.org
-L:	keyrings@linux-nfs.org
+L:	keyrings@vger.kernel.org
 S:	Supported
 F:	Documentation/security/keys-trusted-encrypted.txt
 F:	include/keys/encrypted-type.h

From 2f9b660b2128c92d66f18ac7fbd7c39a91cec159 Mon Sep 17 00:00:00 2001
From: Markus Pargmann <mpa@pengutronix.de>
Date: Wed, 12 Aug 2015 12:12:28 +0200
Subject: [PATCH 186/734] regmap: Fix integertypes for register address and
 value

These values are defined as unsigned int in the struct and are assigned
to int values.

This patch fixes the type to be unsigned int instead.

Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 7111d04f26218b..9b4badc2479deb 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -1768,8 +1768,8 @@ static int _regmap_raw_multi_reg_write(struct regmap *map,
 	u8 = buf;
 
 	for (i = 0; i < num_regs; i++) {
-		int reg = regs[i].reg;
-		int val = regs[i].def;
+		unsigned int reg = regs[i].reg;
+		unsigned int val = regs[i].def;
 		trace_regmap_hw_write_start(map, reg, 1);
 		map->format.format_reg(u8, reg, map->reg_shift);
 		u8 += reg_bytes + pad_bytes;

From b486afbd1baf796a9e4b793b2f9121c12e1469af Mon Sep 17 00:00:00 2001
From: Xiubo Li <lixiubo@cmss.chinamobile.com>
Date: Wed, 12 Aug 2015 15:02:19 +0800
Subject: [PATCH 187/734] regmap: fix typos in regmap.c

There are two typos in drivers/base/regmap/regmap.c, and they may
introduce some noise when checking new patches.

Signed-off-by: Xiubo Li <lixiubo@cmss.chinamobile.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 7111d04f26218b..cae3f268267e37 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -1740,7 +1740,7 @@ EXPORT_SYMBOL_GPL(regmap_bulk_write);
  *
  * the (register,newvalue) pairs in regs have not been formatted, but
  * they are all in the same page and have been changed to being page
- * relative. The page register has been written if that was neccessary.
+ * relative. The page register has been written if that was necessary.
  */
 static int _regmap_raw_multi_reg_write(struct regmap *map,
 				       const struct reg_default *regs,
@@ -2050,7 +2050,7 @@ static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
 
 	/*
 	 * Some buses or devices flag reads by setting the high bits in the
-	 * register addresss; since it's always the high bits for all
+	 * register address; since it's always the high bits for all
 	 * current formats we can do this here rather than in
 	 * formatting.  This may break if we get interesting formats.
 	 */

From c0e8a6c8033e205835fa5e67db7ab4589d2491b2 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Tue, 21 Jul 2015 09:39:46 +0200
Subject: [PATCH 188/734] iommu/vt-d: Keep track of per-iommu domain ids

Instead of searching in the domain array for already
allocated domain ids, keep track of them explicitly.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 54 ++++++++++++++++++++-----------------
 1 file changed, 29 insertions(+), 25 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 013cbc20057970..bf92cd1805c245 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -378,6 +378,11 @@ struct dmar_domain {
 	DECLARE_BITMAP(iommu_bmp, DMAR_UNITS_SUPPORTED);
 					/* bitmap of iommus this domain uses*/
 
+	u16		iommu_did[DMAR_UNITS_SUPPORTED];
+					/* Domain ids per IOMMU. Use u16 since
+					 * domain ids are 16 bit wide according
+					 * to VT-d spec, section 9.3 */
+
 	struct list_head devices;	/* all devices' list */
 	struct iova_domain iovad;	/* iova's that belong to this domain */
 
@@ -1543,11 +1548,13 @@ static int iommu_init_domains(struct intel_iommu *iommu)
 	}
 
 	/*
-	 * if Caching mode is set, then invalid translations are tagged
-	 * with domainid 0. Hence we need to pre-allocate it.
+	 * If Caching mode is set, then invalid translations are tagged
+	 * with domain-id 0, hence we need to pre-allocate it. We also
+	 * use domain-id 0 as a marker for non-allocated domain-id, so
+	 * make sure it is not used for a real domain.
 	 */
-	if (cap_caching_mode(iommu->cap))
-		set_bit(0, iommu->domain_ids);
+	set_bit(0, iommu->domain_ids);
+
 	return 0;
 }
 
@@ -1560,9 +1567,10 @@ static void disable_dmar_iommu(struct intel_iommu *iommu)
 		for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
 			/*
 			 * Domain id 0 is reserved for invalid translation
-			 * if hardware supports caching mode.
+			 * if hardware supports caching mode and used as
+			 * a non-allocated marker.
 			 */
-			if (cap_caching_mode(iommu->cap) && i == 0)
+			if (i == 0)
 				continue;
 
 			domain = iommu->domains[i];
@@ -1624,6 +1632,7 @@ static int __iommu_attach_domain(struct dmar_domain *domain,
 	if (num < ndomains) {
 		set_bit(num, iommu->domain_ids);
 		iommu->domains[num] = domain;
+		domain->iommu_did[iommu->seq_id] = num;
 	} else {
 		num = -ENOSPC;
 	}
@@ -1650,12 +1659,10 @@ static int iommu_attach_vm_domain(struct dmar_domain *domain,
 				  struct intel_iommu *iommu)
 {
 	int num;
-	unsigned long ndomains;
 
-	ndomains = cap_ndoms(iommu->cap);
-	for_each_set_bit(num, iommu->domain_ids, ndomains)
-		if (iommu->domains[num] == domain)
-			return num;
+	num = domain->iommu_did[iommu->seq_id];
+	if (num)
+		return num;
 
 	return __iommu_attach_domain(domain, iommu);
 }
@@ -1664,22 +1671,18 @@ static void iommu_detach_domain(struct dmar_domain *domain,
 				struct intel_iommu *iommu)
 {
 	unsigned long flags;
-	int num, ndomains;
+	int num;
 
 	spin_lock_irqsave(&iommu->lock, flags);
-	if (domain_type_is_vm_or_si(domain)) {
-		ndomains = cap_ndoms(iommu->cap);
-		for_each_set_bit(num, iommu->domain_ids, ndomains) {
-			if (iommu->domains[num] == domain) {
-				clear_bit(num, iommu->domain_ids);
-				iommu->domains[num] = NULL;
-				break;
-			}
-		}
-	} else {
-		clear_bit(domain->id, iommu->domain_ids);
-		iommu->domains[domain->id] = NULL;
-	}
+
+	num = domain->iommu_did[iommu->seq_id];
+
+	if (num == 0)
+		return;
+
+	clear_bit(num, iommu->domain_ids);
+	iommu->domains[num]		 = NULL;
+
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
@@ -1708,6 +1711,7 @@ static int domain_detach_iommu(struct dmar_domain *domain,
 	if (test_and_clear_bit(iommu->seq_id, domain->iommu_bmp)) {
 		count = --domain->iommu_count;
 		domain_update_iommu_cap(domain);
+		domain->iommu_did[iommu->seq_id] = 0;
 	}
 	spin_unlock_irqrestore(&domain->iommu_lock, flags);
 

From 9452d5bfe5c3df6befb89835d2c44920e03bd390 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Tue, 21 Jul 2015 10:00:56 +0200
Subject: [PATCH 189/734] iommu/vt-d: Add access functions for iommu->domains

This makes it easier to change the layout of the data
structure later.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index bf92cd1805c245..1c2d6126e5fdfe 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -569,6 +569,17 @@ __setup("intel_iommu=", intel_iommu_setup);
 static struct kmem_cache *iommu_domain_cache;
 static struct kmem_cache *iommu_devinfo_cache;
 
+static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
+{
+	return iommu->domains[did];
+}
+
+static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
+			     struct dmar_domain *domain)
+{
+	iommu->domains[did] = domain;
+}
+
 static inline void *alloc_pgtable_page(int node)
 {
 	struct page *page;
@@ -1463,7 +1474,8 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
 	 * flush. However, device IOTLB doesn't need to be flushed in this case.
 	 */
 	if (!cap_caching_mode(iommu->cap) || !map)
-		iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
+		iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
+				      addr, mask);
 }
 
 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
@@ -1573,7 +1585,7 @@ static void disable_dmar_iommu(struct intel_iommu *iommu)
 			if (i == 0)
 				continue;
 
-			domain = iommu->domains[i];
+			domain = get_iommu_domain(iommu, i);
 			clear_bit(i, iommu->domain_ids);
 			if (domain_detach_iommu(domain, iommu) == 0 &&
 			    !domain_type_is_vm(domain))
@@ -1631,7 +1643,7 @@ static int __iommu_attach_domain(struct dmar_domain *domain,
 	num = find_first_zero_bit(iommu->domain_ids, ndomains);
 	if (num < ndomains) {
 		set_bit(num, iommu->domain_ids);
-		iommu->domains[num] = domain;
+		set_iommu_domain(iommu, num, domain);
 		domain->iommu_did[iommu->seq_id] = num;
 	} else {
 		num = -ENOSPC;
@@ -1681,7 +1693,7 @@ static void iommu_detach_domain(struct dmar_domain *domain,
 		return;
 
 	clear_bit(num, iommu->domain_ids);
-	iommu->domains[num]		 = NULL;
+	set_iommu_domain(iommu, num, NULL);
 
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
@@ -4852,7 +4864,7 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
                 */
                ndomains = cap_ndoms(iommu->cap);
                for_each_set_bit(num, iommu->domain_ids, ndomains) {
-                       if (iommu->domains[num] == dmar_domain)
+                       if (get_iommu_domain(iommu, num) == dmar_domain)
                                iommu_flush_iotlb_psi(iommu, num, start_pfn,
 						     npages, !freelist, 0);
 	       }

From 8bf478163e69e42973c7070179a11815139e5bf0 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Tue, 21 Jul 2015 10:41:21 +0200
Subject: [PATCH 190/734] iommu/vt-d: Split up iommu->domains array

This array is indexed by the domain-id and contains the
pointers to the domains attached to this iommu. Modern
systems support 65536 domain ids, so that this array has a
size of 512kb, per iommu.

This is a huge waste of space, as the array is usually
sparsely populated. This patch makes the array
two-dimensional and allocates the memory for the domain
pointers on-demand.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 54 +++++++++++++++++++++++++++++--------
 include/linux/intel-iommu.h |  2 +-
 2 files changed, 44 insertions(+), 12 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 1c2d6126e5fdfe..90ab4b0d975cef 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -571,13 +571,32 @@ static struct kmem_cache *iommu_devinfo_cache;
 
 static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
 {
-	return iommu->domains[did];
+	struct dmar_domain **domains;
+	int idx = did >> 8;
+
+	domains = iommu->domains[idx];
+	if (!domains)
+		return NULL;
+
+	return domains[did & 0xff];
 }
 
 static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
 			     struct dmar_domain *domain)
 {
-	iommu->domains[did] = domain;
+	struct dmar_domain **domains;
+	int idx = did >> 8;
+
+	if (!iommu->domains[idx]) {
+		size_t size = 256 * sizeof(struct dmar_domain *);
+		iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
+	}
+
+	domains = iommu->domains[idx];
+	if (WARN_ON(!domains))
+		return;
+	else
+		domains[did & 0xff] = domain;
 }
 
 static inline void *alloc_pgtable_page(int node)
@@ -1530,35 +1549,43 @@ static void iommu_disable_translation(struct intel_iommu *iommu)
 
 static int iommu_init_domains(struct intel_iommu *iommu)
 {
-	unsigned long ndomains;
-	unsigned long nlongs;
+	u32 ndomains, nlongs;
+	size_t size;
 
 	ndomains = cap_ndoms(iommu->cap);
-	pr_debug("%s: Number of Domains supported <%ld>\n",
+	pr_debug("%s: Number of Domains supported <%d>\n",
 		 iommu->name, ndomains);
 	nlongs = BITS_TO_LONGS(ndomains);
 
 	spin_lock_init(&iommu->lock);
 
-	/* TBD: there might be 64K domains,
-	 * consider other allocation for future chip
-	 */
 	iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
 	if (!iommu->domain_ids) {
 		pr_err("%s: Allocating domain id array failed\n",
 		       iommu->name);
 		return -ENOMEM;
 	}
-	iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
-			GFP_KERNEL);
-	if (!iommu->domains) {
+
+	size = ((ndomains >> 8) + 1) * sizeof(struct dmar_domain **);
+	iommu->domains = kzalloc(size, GFP_KERNEL);
+
+	if (iommu->domains) {
+		size = 256 * sizeof(struct dmar_domain *);
+		iommu->domains[0] = kzalloc(size, GFP_KERNEL);
+	}
+
+	if (!iommu->domains || !iommu->domains[0]) {
 		pr_err("%s: Allocating domain array failed\n",
 		       iommu->name);
 		kfree(iommu->domain_ids);
+		kfree(iommu->domains);
 		iommu->domain_ids = NULL;
+		iommu->domains    = NULL;
 		return -ENOMEM;
 	}
 
+
+
 	/*
 	 * If Caching mode is set, then invalid translations are tagged
 	 * with domain-id 0, hence we need to pre-allocate it. We also
@@ -1600,6 +1627,11 @@ static void disable_dmar_iommu(struct intel_iommu *iommu)
 static void free_dmar_iommu(struct intel_iommu *iommu)
 {
 	if ((iommu->domains) && (iommu->domain_ids)) {
+		int elems = (cap_ndoms(iommu->cap) >> 8) + 1;
+		int i;
+
+		for (i = 0; i < elems; i++)
+			kfree(iommu->domains[i]);
 		kfree(iommu->domains);
 		kfree(iommu->domain_ids);
 		iommu->domains = NULL;
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index d9a366d24e3bb8..6240063bdcac4c 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -344,7 +344,7 @@ struct intel_iommu {
 
 #ifdef CONFIG_INTEL_IOMMU
 	unsigned long 	*domain_ids; /* bitmap of domains */
-	struct dmar_domain **domains; /* ptr to domains */
+	struct dmar_domain ***domains; /* ptr to domains */
 	spinlock_t	lock; /* protect context, domain ids */
 	struct root_entry *root_entry; /* virtual address */
 

From e2411427f7d3ddcf8d5f35d5ab0a397180deac3a Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Tue, 21 Jul 2015 11:18:21 +0200
Subject: [PATCH 191/734] iommu/vt-d: Get rid of iommu_attach_vm_domain()

The special case for VM domains is not needed, as other
domains could be attached to the iommu in the same way. So
get rid of this special case.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 90ab4b0d975cef..0a07b44fbc82f7 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1671,8 +1671,13 @@ static int __iommu_attach_domain(struct dmar_domain *domain,
 	int num;
 	unsigned long ndomains;
 
+	num = domain->iommu_did[iommu->seq_id];
+	if (num)
+		return num;
+
 	ndomains = cap_ndoms(iommu->cap);
-	num = find_first_zero_bit(iommu->domain_ids, ndomains);
+	num	 = find_first_zero_bit(iommu->domain_ids, ndomains);
+
 	if (num < ndomains) {
 		set_bit(num, iommu->domain_ids);
 		set_iommu_domain(iommu, num, domain);
@@ -1681,6 +1686,9 @@ static int __iommu_attach_domain(struct dmar_domain *domain,
 		num = -ENOSPC;
 	}
 
+	if (num < 0)
+		pr_err("%s: No free domain ids\n", iommu->name);
+
 	return num;
 }
 
@@ -1693,24 +1701,10 @@ static int iommu_attach_domain(struct dmar_domain *domain,
 	spin_lock_irqsave(&iommu->lock, flags);
 	num = __iommu_attach_domain(domain, iommu);
 	spin_unlock_irqrestore(&iommu->lock, flags);
-	if (num < 0)
-		pr_err("%s: No free domain ids\n", iommu->name);
 
 	return num;
 }
 
-static int iommu_attach_vm_domain(struct dmar_domain *domain,
-				  struct intel_iommu *iommu)
-{
-	int num;
-
-	num = domain->iommu_did[iommu->seq_id];
-	if (num)
-		return num;
-
-	return __iommu_attach_domain(domain, iommu);
-}
-
 static void iommu_detach_domain(struct dmar_domain *domain,
 				struct intel_iommu *iommu)
 {
@@ -1947,7 +1941,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
 
 	if (domain_type_is_vm_or_si(domain)) {
 		if (domain_type_is_vm(domain)) {
-			id = iommu_attach_vm_domain(domain, iommu);
+			id = __iommu_attach_domain(domain, iommu);
 			if (id < 0) {
 				spin_unlock_irqrestore(&iommu->lock, flags);
 				pr_err("%s: No free domain ids\n", iommu->name);

From 28ccce0d954a1cf3baba335bf12581357112fb35 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Tue, 21 Jul 2015 14:45:31 +0200
Subject: [PATCH 192/734] iommu/vt-d: Calculate translation in
 domain_context_mapping_one

There is no reason to pass the translation type through
multiple layers. It can also be determined in the
domain_context_mapping_one function directly.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 49 +++++++++++++++++--------------------
 1 file changed, 23 insertions(+), 26 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 0a07b44fbc82f7..ca6ca3ddf34927 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -364,7 +364,8 @@ static inline int first_pte_in_page(struct dma_pte *pte)
 static struct dmar_domain *si_domain;
 static int hw_pass_through = 1;
 
-/* domain represents a virtual machine, more than one devices
+/*
+ * Domain represents a virtual machine, more than one devices
  * across iommus may be owned in one domain, e.g. kvm guest.
  */
 #define DOMAIN_FLAG_VIRTUAL_MACHINE	(1 << 0)
@@ -640,6 +641,11 @@ static inline int domain_type_is_vm(struct dmar_domain *domain)
 	return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
 }
 
+static inline int domain_type_is_si(struct dmar_domain *domain)
+{
+	return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
+}
+
 static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
 {
 	return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
@@ -1907,21 +1913,23 @@ static void domain_exit(struct dmar_domain *domain)
 
 static int domain_context_mapping_one(struct dmar_domain *domain,
 				      struct intel_iommu *iommu,
-				      u8 bus, u8 devfn, int translation)
+				      u8 bus, u8 devfn)
 {
+	int translation = CONTEXT_TT_MULTI_LEVEL;
+	struct device_domain_info *info = NULL;
 	struct context_entry *context;
 	unsigned long flags;
 	struct dma_pte *pgd;
 	int id;
 	int agaw;
-	struct device_domain_info *info = NULL;
+
+	if (hw_pass_through && domain_type_is_si(domain))
+		translation = CONTEXT_TT_PASS_THROUGH;
 
 	pr_debug("Set context mapping for %02x:%02x.%d\n",
 		bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
 
 	BUG_ON(!domain->pgd);
-	BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
-	       translation != CONTEXT_TT_MULTI_LEVEL);
 
 	spin_lock_irqsave(&iommu->lock, flags);
 	context = iommu_context_addr(iommu, bus, devfn, 1);
@@ -2013,7 +2021,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
 struct domain_context_mapping_data {
 	struct dmar_domain *domain;
 	struct intel_iommu *iommu;
-	int translation;
 };
 
 static int domain_context_mapping_cb(struct pci_dev *pdev,
@@ -2022,13 +2029,11 @@ static int domain_context_mapping_cb(struct pci_dev *pdev,
 	struct domain_context_mapping_data *data = opaque;
 
 	return domain_context_mapping_one(data->domain, data->iommu,
-					  PCI_BUS_NUM(alias), alias & 0xff,
-					  data->translation);
+					  PCI_BUS_NUM(alias), alias & 0xff);
 }
 
 static int
-domain_context_mapping(struct dmar_domain *domain, struct device *dev,
-		       int translation)
+domain_context_mapping(struct dmar_domain *domain, struct device *dev)
 {
 	struct intel_iommu *iommu;
 	u8 bus, devfn;
@@ -2039,12 +2044,10 @@ domain_context_mapping(struct dmar_domain *domain, struct device *dev,
 		return -ENODEV;
 
 	if (!dev_is_pci(dev))
-		return domain_context_mapping_one(domain, iommu, bus, devfn,
-						  translation);
+		return domain_context_mapping_one(domain, iommu, bus, devfn);
 
 	data.domain = domain;
 	data.iommu = iommu;
-	data.translation = translation;
 
 	return pci_for_each_dma_alias(to_pci_dev(dev),
 				      &domain_context_mapping_cb, &data);
@@ -2511,7 +2514,7 @@ static int iommu_prepare_identity_map(struct device *dev,
 		goto error;
 
 	/* context entry init */
-	ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
+	ret = domain_context_mapping(domain, dev);
 	if (ret)
 		goto error;
 
@@ -2624,8 +2627,7 @@ static int identity_mapping(struct device *dev)
 	return 0;
 }
 
-static int domain_add_dev_info(struct dmar_domain *domain,
-			       struct device *dev, int translation)
+static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
 {
 	struct dmar_domain *ndomain;
 	struct intel_iommu *iommu;
@@ -2640,7 +2642,7 @@ static int domain_add_dev_info(struct dmar_domain *domain,
 	if (ndomain != domain)
 		return -EBUSY;
 
-	ret = domain_context_mapping(domain, dev, translation);
+	ret = domain_context_mapping(domain, dev);
 	if (ret) {
 		domain_remove_one_dev_info(domain, dev);
 		return ret;
@@ -2785,9 +2787,7 @@ static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw
 	if (!iommu_should_identity_map(dev, 1))
 		return 0;
 
-	ret = domain_add_dev_info(si_domain, dev,
-				  hw ? CONTEXT_TT_PASS_THROUGH :
-				       CONTEXT_TT_MULTI_LEVEL);
+	ret = domain_add_dev_info(si_domain, dev);
 	if (!ret)
 		pr_info("%s identity mapping for device %s\n",
 			hw ? "Hardware" : "Software", dev_name(dev));
@@ -3314,7 +3314,7 @@ static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
 
 	/* make sure context mapping is ok */
 	if (unlikely(!domain_context_mapped(dev))) {
-		ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
+		ret = domain_context_mapping(domain, dev);
 		if (ret) {
 			pr_err("Domain context map for %s failed\n",
 			       dev_name(dev));
@@ -3369,10 +3369,7 @@ static int iommu_no_mapping(struct device *dev)
 		 */
 		if (iommu_should_identity_map(dev, 0)) {
 			int ret;
-			ret = domain_add_dev_info(si_domain, dev,
-						  hw_pass_through ?
-						  CONTEXT_TT_PASS_THROUGH :
-						  CONTEXT_TT_MULTI_LEVEL);
+			ret = domain_add_dev_info(si_domain, dev);
 			if (!ret) {
 				pr_info("64bit %s uses identity mapping\n",
 					dev_name(dev));
@@ -4810,7 +4807,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
 		dmar_domain->agaw--;
 	}
 
-	return domain_add_dev_info(dmar_domain, dev, CONTEXT_TT_MULTI_LEVEL);
+	return domain_add_dev_info(dmar_domain, dev);
 }
 
 static void intel_iommu_detach_device(struct iommu_domain *domain,

From de24e55395698e29f2a0582ae1899fa0001f829a Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Tue, 21 Jul 2015 14:53:04 +0200
Subject: [PATCH 193/734] iommu/vt-d: Simplify domain_context_mapping_one

Get rid of the special cases for VM domains vs. non-VM
domains and simplify the code further to just handle the
hardware passthrough vs. page-table case.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 60 ++++++++++++++++---------------------
 1 file changed, 26 insertions(+), 34 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index ca6ca3ddf34927..5d4261ff67ad18 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1942,52 +1942,44 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
 		return 0;
 	}
 
-	context_clear_entry(context);
-
-	id = domain->id;
 	pgd = domain->pgd;
 
-	if (domain_type_is_vm_or_si(domain)) {
-		if (domain_type_is_vm(domain)) {
-			id = __iommu_attach_domain(domain, iommu);
-			if (id < 0) {
-				spin_unlock_irqrestore(&iommu->lock, flags);
-				pr_err("%s: No free domain ids\n", iommu->name);
-				return -EFAULT;
-			}
-		}
-
-		/* Skip top levels of page tables for
-		 * iommu which has less agaw than default.
-		 * Unnecessary for PT mode.
-		 */
-		if (translation != CONTEXT_TT_PASS_THROUGH) {
-			for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
-				pgd = phys_to_virt(dma_pte_addr(pgd));
-				if (!dma_pte_present(pgd)) {
-					spin_unlock_irqrestore(&iommu->lock, flags);
-					return -ENOMEM;
-				}
-			}
-		}
+	id = __iommu_attach_domain(domain, iommu);
+	if (id < 0) {
+		spin_unlock_irqrestore(&iommu->lock, flags);
+		pr_err("%s: No free domain ids\n", iommu->name);
+		return -EFAULT;
 	}
 
+	context_clear_entry(context);
 	context_set_domain_id(context, id);
 
+	/*
+	 * Skip top levels of page tables for iommu which has less agaw
+	 * than default.  Unnecessary for PT mode.
+	 */
 	if (translation != CONTEXT_TT_PASS_THROUGH) {
+		for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
+			pgd = phys_to_virt(dma_pte_addr(pgd));
+			if (!dma_pte_present(pgd)) {
+				spin_unlock_irqrestore(&iommu->lock, flags);
+				return -ENOMEM;
+			}
+		}
+
 		info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
 		translation = info ? CONTEXT_TT_DEV_IOTLB :
 				     CONTEXT_TT_MULTI_LEVEL;
-	}
-	/*
-	 * In pass through mode, AW must be programmed to indicate the largest
-	 * AGAW value supported by hardware. And ASR is ignored by hardware.
-	 */
-	if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
-		context_set_address_width(context, iommu->msagaw);
-	else {
+
 		context_set_address_root(context, virt_to_phys(pgd));
 		context_set_address_width(context, iommu->agaw);
+	} else {
+		/*
+		 * In pass through mode, AW must be programmed to
+		 * indicate the largest AGAW value supported by
+		 * hardware. And ASR is ignored by hardware.
+		 */
+		context_set_address_width(context, iommu->msagaw);
 	}
 
 	context_set_translation_type(context, translation);

From a1ddcbe9301023928f877b675a40914427928f2a Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Tue, 21 Jul 2015 15:20:32 +0200
Subject: [PATCH 194/734] iommu/vt-d: Pass dmar_domain directly into
 iommu_flush_iotlb_psi

This function can figure out the domain-id to use itself
from the iommu_did array. This is more reliable over
different domain types and brings us one step further to
remove the domain->id field.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 42 +++++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 18 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 5d4261ff67ad18..380b4e2f76cdd7 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1471,11 +1471,14 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
 	spin_unlock_irqrestore(&device_domain_lock, flags);
 }
 
-static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
-				  unsigned long pfn, unsigned int pages, int ih, int map)
+static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
+				  struct dmar_domain *domain,
+				  unsigned long pfn, unsigned int pages,
+				  int ih, int map)
 {
 	unsigned int mask = ilog2(__roundup_pow_of_two(pages));
 	uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
+	u16 did = domain->iommu_did[iommu->seq_id];
 
 	BUG_ON(pages == 0);
 
@@ -3422,7 +3425,9 @@ static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
 
 	/* it's a non-present to present mapping. Only flush if caching mode */
 	if (cap_caching_mode(iommu->cap))
-		iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 0, 1);
+		iommu_flush_iotlb_psi(iommu, domain,
+				      mm_to_dma_pfn(iova->pfn_lo),
+				      size, 0, 1);
 	else
 		iommu_flush_write_buffer(iommu);
 
@@ -3473,7 +3478,7 @@ static void flush_unmaps(void)
 
 			/* On real hardware multiple invalidations are expensive */
 			if (cap_caching_mode(iommu->cap))
-				iommu_flush_iotlb_psi(iommu, domain->id,
+				iommu_flush_iotlb_psi(iommu, domain,
 					iova->pfn_lo, iova_size(iova),
 					!deferred_flush[i].freelist[j], 0);
 			else {
@@ -3557,7 +3562,7 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr)
 	freelist = domain_unmap(domain, start_pfn, last_pfn);
 
 	if (intel_iommu_strict) {
-		iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
+		iommu_flush_iotlb_psi(iommu, domain, start_pfn,
 				      last_pfn - start_pfn + 1, !freelist, 0);
 		/* free iova */
 		__free_iova(&domain->iovad, iova);
@@ -3715,7 +3720,7 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele
 
 	/* it's a non-present to present mapping. Only flush if caching mode */
 	if (cap_caching_mode(iommu->cap))
-		iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 0, 1);
+		iommu_flush_iotlb_psi(iommu, domain, start_vpfn, size, 0, 1);
 	else
 		iommu_flush_write_buffer(iommu);
 
@@ -4421,7 +4426,7 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb,
 
 			rcu_read_lock();
 			for_each_active_iommu(iommu, drhd)
-				iommu_flush_iotlb_psi(iommu, si_domain->id,
+				iommu_flush_iotlb_psi(iommu, si_domain,
 					iova->pfn_lo, iova_size(iova),
 					!freelist, 0);
 			rcu_read_unlock();
@@ -4872,17 +4877,18 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
 	npages = last_pfn - start_pfn + 1;
 
 	for_each_set_bit(iommu_id, dmar_domain->iommu_bmp, g_num_of_iommus) {
-               iommu = g_iommus[iommu_id];
-
-               /*
-                * find bit position of dmar_domain
-                */
-               ndomains = cap_ndoms(iommu->cap);
-               for_each_set_bit(num, iommu->domain_ids, ndomains) {
-                       if (get_iommu_domain(iommu, num) == dmar_domain)
-                               iommu_flush_iotlb_psi(iommu, num, start_pfn,
-						     npages, !freelist, 0);
-	       }
+		iommu = g_iommus[iommu_id];
+
+		/*
+		 * find bit position of dmar_domain
+		 */
+		ndomains = cap_ndoms(iommu->cap);
+		for_each_set_bit(num, iommu->domain_ids, ndomains) {
+			if (get_iommu_domain(iommu, num) == dmar_domain)
+				iommu_flush_iotlb_psi(iommu, dmar_domain,
+						      start_pfn, npages,
+						      !freelist, 0);
+		}
 
 	}
 

From 0dc7971594aad73b50722878ea7175055a4fdfcd Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Tue, 21 Jul 2015 15:40:06 +0200
Subject: [PATCH 195/734] iommu/vt-d: Don't pre-allocate domain ids for
 si_domain

There is no reason for this special handling of the
si_domain. The per-iommu domain-id can be allocated
on-demand like for any other domain. So remove the
pre-allocation code.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 28 +---------------------------
 1 file changed, 1 insertion(+), 27 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 380b4e2f76cdd7..2a64c3fe49db5a 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -2558,37 +2558,18 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width);
 
 static int __init si_domain_init(int hw)
 {
-	struct dmar_drhd_unit *drhd;
-	struct intel_iommu *iommu;
 	int nid, ret = 0;
-	bool first = true;
 
 	si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
 	if (!si_domain)
 		return -EFAULT;
 
-	for_each_active_iommu(iommu, drhd) {
-		ret = iommu_attach_domain(si_domain, iommu);
-		if (ret < 0) {
-			domain_exit(si_domain);
-			return -EFAULT;
-		} else if (first) {
-			si_domain->id = ret;
-			first = false;
-		} else if (si_domain->id != ret) {
-			domain_exit(si_domain);
-			return -EFAULT;
-		}
-		domain_attach_iommu(si_domain, iommu);
-	}
-
 	if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
 		domain_exit(si_domain);
 		return -EFAULT;
 	}
 
-	pr_debug("Identity mapping domain is domain %d\n",
-		 si_domain->id);
+	pr_debug("Identity mapping domain allocated\n");
 
 	if (hw)
 		return 0;
@@ -4197,13 +4178,6 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
 	iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
 	iommu_enable_translation(iommu);
 
-	if (si_domain) {
-		ret = iommu_attach_domain(si_domain, iommu);
-		if (ret < 0 || si_domain->id != ret)
-			goto disable_iommu;
-		domain_attach_iommu(si_domain, iommu);
-	}
-
 	iommu_disable_protect_mem_regions(iommu);
 	return 0;
 

From af1089ce388b2d14c8331b96567b7e3b7eb5f35b Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Tue, 21 Jul 2015 15:45:19 +0200
Subject: [PATCH 196/734] iommu/vt-d: Kill dmar_domain->id

This field is now obsolete because all places use the
per-iommu domain-ids. Kill the remaining uses of this field
and remove it.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 2a64c3fe49db5a..91f0c3d5a6acae 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -374,7 +374,6 @@ static int hw_pass_through = 1;
 #define DOMAIN_FLAG_STATIC_IDENTITY	(1 << 1)
 
 struct dmar_domain {
-	int	id;			/* domain id */
 	int	nid;			/* node id */
 	DECLARE_BITMAP(iommu_bmp, DMAR_UNITS_SUPPORTED);
 					/* bitmap of iommus this domain uses*/
@@ -1655,8 +1654,6 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
 
 static struct dmar_domain *alloc_domain(int flags)
 {
-	/* domain id for virtual machine, it won't be set in context */
-	static atomic_t vm_domid = ATOMIC_INIT(0);
 	struct dmar_domain *domain;
 
 	domain = alloc_domain_mem();
@@ -1668,8 +1665,6 @@ static struct dmar_domain *alloc_domain(int flags)
 	domain->flags = flags;
 	spin_lock_init(&domain->iommu_lock);
 	INIT_LIST_HEAD(&domain->devices);
-	if (flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
-		domain->id = atomic_inc_return(&vm_domid);
 
 	return domain;
 }
@@ -2392,8 +2387,7 @@ static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
 	domain = alloc_domain(0);
 	if (!domain)
 		return NULL;
-	domain->id = iommu_attach_domain(domain, iommu);
-	if (domain->id < 0) {
+	if (iommu_attach_domain(domain, iommu) < 0) {
 		free_domain_mem(domain);
 		return NULL;
 	}
@@ -2446,8 +2440,7 @@ static int iommu_domain_identity_map(struct dmar_domain *domain,
 		return -ENOMEM;
 	}
 
-	pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
-		 start, end, domain->id);
+	pr_debug("Mapping reserved region %llx-%llx\n", start, end);
 	/*
 	 * RMRR range might have overlap with physical memory range,
 	 * clear it first

From 29a27719abaa4d74aed928803c1aa9437bbdde89 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Tue, 21 Jul 2015 17:17:12 +0200
Subject: [PATCH 197/734] iommu/vt-d: Replace iommu_bmp with a refcount

This replaces the dmar_domain->iommu_bmp with a similar
reference count array. This allows us to keep track of how
many devices behind each iommu are attached to the domain.

This is necessary for further simplifications and
optimizations to the iommu<->domain attachment code.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 82 ++++++++++++++++++++-----------------
 1 file changed, 45 insertions(+), 37 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 91f0c3d5a6acae..a5aa957a69d51e 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -373,10 +373,16 @@ static int hw_pass_through = 1;
 /* si_domain contains mulitple devices */
 #define DOMAIN_FLAG_STATIC_IDENTITY	(1 << 1)
 
+#define for_each_domain_iommu(idx, domain)			\
+	for (idx = 0; idx < g_num_of_iommus; idx++)		\
+		if (domain->iommu_refcnt[idx])
+
 struct dmar_domain {
 	int	nid;			/* node id */
-	DECLARE_BITMAP(iommu_bmp, DMAR_UNITS_SUPPORTED);
-					/* bitmap of iommus this domain uses*/
+
+	unsigned	iommu_refcnt[DMAR_UNITS_SUPPORTED];
+					/* Refcount of devices per iommu */
+
 
 	u16		iommu_did[DMAR_UNITS_SUPPORTED];
 					/* Domain ids per IOMMU. Use u16 since
@@ -699,7 +705,9 @@ static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
 
 	/* si_domain and vm domain should not get here. */
 	BUG_ON(domain_type_is_vm_or_si(domain));
-	iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
+	for_each_domain_iommu(iommu_id, domain)
+		break;
+
 	if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
 		return NULL;
 
@@ -715,7 +723,7 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain)
 
 	domain->iommu_coherency = 1;
 
-	for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
+	for_each_domain_iommu(i, domain) {
 		found = true;
 		if (!ecap_coherent(g_iommus[i]->ecap)) {
 			domain->iommu_coherency = 0;
@@ -1607,25 +1615,26 @@ static int iommu_init_domains(struct intel_iommu *iommu)
 
 static void disable_dmar_iommu(struct intel_iommu *iommu)
 {
-	struct dmar_domain *domain;
-	int i;
+	struct device_domain_info *info, *tmp;
 
-	if ((iommu->domains) && (iommu->domain_ids)) {
-		for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
-			/*
-			 * Domain id 0 is reserved for invalid translation
-			 * if hardware supports caching mode and used as
-			 * a non-allocated marker.
-			 */
-			if (i == 0)
-				continue;
+	if (!iommu->domains || !iommu->domain_ids)
+		return;
 
-			domain = get_iommu_domain(iommu, i);
-			clear_bit(i, iommu->domain_ids);
-			if (domain_detach_iommu(domain, iommu) == 0 &&
-			    !domain_type_is_vm(domain))
-				domain_exit(domain);
-		}
+	list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
+		struct dmar_domain *domain;
+
+		if (info->iommu != iommu)
+			continue;
+
+		if (!info->dev || !info->domain)
+			continue;
+
+		domain = info->domain;
+
+		domain_remove_one_dev_info(domain, info->dev);
+
+		if (!domain_type_is_vm_or_si(domain))
+			domain_exit(domain);
 	}
 
 	if (iommu->gcmd & DMA_GCMD_TE)
@@ -1734,10 +1743,10 @@ static void domain_attach_iommu(struct dmar_domain *domain,
 	unsigned long flags;
 
 	spin_lock_irqsave(&domain->iommu_lock, flags);
-	if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
-		domain->iommu_count++;
-		if (domain->iommu_count == 1)
-			domain->nid = iommu->node;
+	domain->iommu_refcnt[iommu->seq_id] += 1;
+	domain->iommu_count += 1;
+	if (domain->iommu_refcnt[iommu->seq_id] == 1) {
+		domain->nid = iommu->node;
 		domain_update_iommu_cap(domain);
 	}
 	spin_unlock_irqrestore(&domain->iommu_lock, flags);
@@ -1750,8 +1759,9 @@ static int domain_detach_iommu(struct dmar_domain *domain,
 	int count = INT_MAX;
 
 	spin_lock_irqsave(&domain->iommu_lock, flags);
-	if (test_and_clear_bit(iommu->seq_id, domain->iommu_bmp)) {
-		count = --domain->iommu_count;
+	domain->iommu_refcnt[iommu->seq_id] -= 1;
+	count = --domain->iommu_count;
+	if (domain->iommu_refcnt[iommu->seq_id] == 0) {
 		domain_update_iommu_cap(domain);
 		domain->iommu_did[iommu->seq_id] = 0;
 	}
@@ -1876,9 +1886,8 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
 
 static void domain_exit(struct dmar_domain *domain)
 {
-	struct dmar_drhd_unit *drhd;
-	struct intel_iommu *iommu;
 	struct page *freelist = NULL;
+	int i;
 
 	/* Domain 0 is reserved, so dont process it */
 	if (!domain)
@@ -1898,10 +1907,8 @@ static void domain_exit(struct dmar_domain *domain)
 
 	/* clear attached or cached domains */
 	rcu_read_lock();
-	for_each_active_iommu(iommu, drhd)
-		if (domain_type_is_vm(domain) ||
-		    test_bit(iommu->seq_id, domain->iommu_bmp))
-			iommu_detach_domain(domain, iommu);
+	for_each_domain_iommu(i, domain)
+		iommu_detach_domain(domain, g_iommus[i]);
 	rcu_read_unlock();
 
 	dma_free_pagelist(freelist);
@@ -4633,9 +4640,10 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,
 				continue;
 		}
 
-		/* if there is no other devices under the same iommu
-		 * owned by this domain, clear this iommu in iommu_bmp
-		 * update iommu count and coherency
+		/*
+		 * If there is no other devices under the same iommu owned by
+		 * this domain, clear this iommu in iommu_refcnt update iommu
+		 * count and coherency.
 		 */
 		if (info->iommu == iommu)
 			found = true;
@@ -4843,7 +4851,7 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
 
 	npages = last_pfn - start_pfn + 1;
 
-	for_each_set_bit(iommu_id, dmar_domain->iommu_bmp, g_num_of_iommus) {
+	for_each_domain_iommu(iommu_id, dmar_domain) {
 		iommu = g_iommus[iommu_id];
 
 		/*

From 42e8c186b595a32918933b3dec445f0bf0f486f6 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Tue, 21 Jul 2015 15:50:02 +0200
Subject: [PATCH 198/734] iommu/vt-d: Simplify io/tlb flushing in
 intel_iommu_unmap

We don't need to do an expensive search for domain-ids
anymore, as we keep track of per-iommu domain-ids.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index a5aa957a69d51e..57c115cb351b7c 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -4834,7 +4834,7 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
 	struct intel_iommu *iommu;
 	unsigned long start_pfn, last_pfn;
 	unsigned int npages;
-	int iommu_id, num, ndomains, level = 0;
+	int iommu_id, level = 0;
 
 	/* Cope with horrid API which requires us to unmap more than the
 	   size argument if it happens to be a large-page mapping. */
@@ -4854,17 +4854,8 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
 	for_each_domain_iommu(iommu_id, dmar_domain) {
 		iommu = g_iommus[iommu_id];
 
-		/*
-		 * find bit position of dmar_domain
-		 */
-		ndomains = cap_ndoms(iommu->cap);
-		for_each_set_bit(num, iommu->domain_ids, ndomains) {
-			if (get_iommu_domain(iommu, num) == dmar_domain)
-				iommu_flush_iotlb_psi(iommu, dmar_domain,
-						      start_pfn, npages,
-						      !freelist, 0);
-		}
-
+		iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
+				      start_pfn, npages, !freelist, 0);
 	}
 
 	dma_free_pagelist(freelist);

From b608ac3b6d54c38d6cf0eb91547f0f960633eb2d Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Tue, 21 Jul 2015 18:19:08 +0200
Subject: [PATCH 199/734] iommu/vt-d: Simplify domain_remove_one_dev_info()

Simplify this function as much as possible with the new
iommu_refcnt field.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 50 ++++++++++++-------------------------
 1 file changed, 16 insertions(+), 34 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 57c115cb351b7c..d241353226c566 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -4610,52 +4610,34 @@ static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
 static void domain_remove_one_dev_info(struct dmar_domain *domain,
 				       struct device *dev)
 {
-	struct device_domain_info *info, *tmp;
+	struct device_domain_info *info;
 	struct intel_iommu *iommu;
 	unsigned long flags;
-	bool found = false;
 	u8 bus, devfn;
 
 	iommu = device_to_iommu(dev, &bus, &devfn);
 	if (!iommu)
 		return;
 
-	spin_lock_irqsave(&device_domain_lock, flags);
-	list_for_each_entry_safe(info, tmp, &domain->devices, link) {
-		if (info->iommu == iommu && info->bus == bus &&
-		    info->devfn == devfn) {
-			unlink_domain_info(info);
-			spin_unlock_irqrestore(&device_domain_lock, flags);
-
-			iommu_disable_dev_iotlb(info);
-			iommu_detach_dev(iommu, info->bus, info->devfn);
-			iommu_detach_dependent_devices(iommu, dev);
-			free_devinfo_mem(info);
-
-			spin_lock_irqsave(&device_domain_lock, flags);
-
-			if (found)
-				break;
-			else
-				continue;
-		}
+	info = dev->archdata.iommu;
 
-		/*
-		 * If there is no other devices under the same iommu owned by
-		 * this domain, clear this iommu in iommu_refcnt update iommu
-		 * count and coherency.
-		 */
-		if (info->iommu == iommu)
-			found = true;
-	}
+	if (WARN_ON(!info))
+		return;
 
+	spin_lock_irqsave(&device_domain_lock, flags);
+	unlink_domain_info(info);
 	spin_unlock_irqrestore(&device_domain_lock, flags);
 
-	if (found == 0) {
-		domain_detach_iommu(domain, iommu);
-		if (!domain_type_is_vm_or_si(domain))
-			iommu_detach_domain(domain, iommu);
-	}
+	iommu_disable_dev_iotlb(info);
+	iommu_detach_dev(iommu, info->bus, info->devfn);
+	iommu_detach_dependent_devices(iommu, dev);
+	free_devinfo_mem(info);
+	domain_detach_iommu(domain, iommu);
+
+	spin_lock_irqsave(&domain->iommu_lock, flags);
+	if (!domain->iommu_refcnt[iommu->seq_id])
+		iommu_detach_domain(domain, iommu);
+	spin_unlock_irqrestore(&domain->iommu_lock, flags);
 }
 
 static int md_domain_init(struct dmar_domain *domain, int guest_width)

From 76f45fe35c7a54e6fe5539660db2c8cfb23a2972 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Tue, 21 Jul 2015 18:25:11 +0200
Subject: [PATCH 200/734] iommu/vt-d: Simplify domain_remove_dev_info()

Just call domain_remove_one_dev_info() for all devices in
the domain instead of reimplementing the functionality.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 20 ++------------------
 1 file changed, 2 insertions(+), 18 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index d241353226c566..c674aa12ef317d 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -2253,25 +2253,9 @@ static inline void unlink_domain_info(struct device_domain_info *info)
 static void domain_remove_dev_info(struct dmar_domain *domain)
 {
 	struct device_domain_info *info, *tmp;
-	unsigned long flags;
-
-	spin_lock_irqsave(&device_domain_lock, flags);
-	list_for_each_entry_safe(info, tmp, &domain->devices, link) {
-		unlink_domain_info(info);
-		spin_unlock_irqrestore(&device_domain_lock, flags);
-
-		iommu_disable_dev_iotlb(info);
-		iommu_detach_dev(info->iommu, info->bus, info->devfn);
-
-		if (domain_type_is_vm(domain)) {
-			iommu_detach_dependent_devices(info->iommu, info->dev);
-			domain_detach_iommu(domain, info->iommu);
-		}
 
-		free_devinfo_mem(info);
-		spin_lock_irqsave(&device_domain_lock, flags);
-	}
-	spin_unlock_irqrestore(&device_domain_lock, flags);
+	list_for_each_entry_safe(info, tmp, &domain->devices, link)
+		domain_remove_one_dev_info(domain, info->dev);
 }
 
 /*

From cc4e2575cc96b1aac910f56e1d7ef45d219b40b2 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 22 Jul 2015 10:04:36 +0200
Subject: [PATCH 201/734] iommu/vt-d: Move context-mapping into
 dmar_insert_dev_info

Do the context-mapping of devices from a single place in the
call-path and clean up the other call-sites.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 33 ++++++++-------------------------
 1 file changed, 8 insertions(+), 25 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index c674aa12ef317d..a5ac99cc9ffcea 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -2327,6 +2327,12 @@ static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu,
 		dev->archdata.iommu = info;
 	spin_unlock_irqrestore(&device_domain_lock, flags);
 
+	if (dev && domain_context_mapping(domain, dev)) {
+		pr_err("Domain context map for %s failed\n", dev_name(dev));
+		domain_remove_one_dev_info(domain, dev);
+		return NULL;
+	}
+
 	return domain;
 }
 
@@ -2339,11 +2345,11 @@ static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
 /* domain is initialized */
 static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
 {
+	struct device_domain_info *info = NULL;
 	struct dmar_domain *domain, *tmp;
 	struct intel_iommu *iommu;
-	struct device_domain_info *info;
-	u16 dma_alias;
 	unsigned long flags;
+	u16 dma_alias;
 	u8 bus, devfn;
 
 	domain = find_domain(dev);
@@ -2492,11 +2498,6 @@ static int iommu_prepare_identity_map(struct device *dev,
 	if (ret)
 		goto error;
 
-	/* context entry init */
-	ret = domain_context_mapping(domain, dev);
-	if (ret)
-		goto error;
-
 	return 0;
 
  error:
@@ -2592,7 +2593,6 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
 	struct dmar_domain *ndomain;
 	struct intel_iommu *iommu;
 	u8 bus, devfn;
-	int ret;
 
 	iommu = device_to_iommu(dev, &bus, &devfn);
 	if (!iommu)
@@ -2602,12 +2602,6 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
 	if (ndomain != domain)
 		return -EBUSY;
 
-	ret = domain_context_mapping(domain, dev);
-	if (ret) {
-		domain_remove_one_dev_info(domain, dev);
-		return ret;
-	}
-
 	return 0;
 }
 
@@ -3263,7 +3257,6 @@ static struct iova *intel_alloc_iova(struct device *dev,
 static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
 {
 	struct dmar_domain *domain;
-	int ret;
 
 	domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
 	if (!domain) {
@@ -3272,16 +3265,6 @@ static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
 		return NULL;
 	}
 
-	/* make sure context mapping is ok */
-	if (unlikely(!domain_context_mapped(dev))) {
-		ret = domain_context_mapping(domain, dev);
-		if (ret) {
-			pr_err("Domain context map for %s failed\n",
-			       dev_name(dev));
-			return NULL;
-		}
-	}
-
 	return domain;
 }
 

From 5db31569e9503654477b504de7161d01f85f7261 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 22 Jul 2015 12:40:43 +0200
Subject: [PATCH 202/734] iommu/vt-d: Rename dmar_insert_dev_info()

Rename this function to dmar_insert_one_dev_info() to match
the name better with its counter part function
domain_remove_one_dev_info().

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index a5ac99cc9ffcea..4a7fc0aebded87 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -2286,10 +2286,10 @@ dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
 	return NULL;
 }
 
-static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu,
-						int bus, int devfn,
-						struct device *dev,
-						struct dmar_domain *domain)
+static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
+						    int bus, int devfn,
+						    struct device *dev,
+						    struct dmar_domain *domain)
 {
 	struct dmar_domain *found = NULL;
 	struct device_domain_info *info;
@@ -2396,8 +2396,8 @@ static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
 
 	/* register PCI DMA alias device */
 	if (dev_is_pci(dev)) {
-		tmp = dmar_insert_dev_info(iommu, PCI_BUS_NUM(dma_alias),
-					   dma_alias & 0xff, NULL, domain);
+		tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
+					       dma_alias & 0xff, NULL, domain);
 
 		if (!tmp || tmp != domain) {
 			domain_exit(domain);
@@ -2409,7 +2409,7 @@ static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
 	}
 
 found_domain:
-	tmp = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
+	tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
 
 	if (!tmp || tmp != domain) {
 		domain_exit(domain);
@@ -2598,7 +2598,7 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
 	if (!iommu)
 		return -ENODEV;
 
-	ndomain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
+	ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
 	if (ndomain != domain)
 		return -EBUSY;
 

From e6de0f8dfcd0395efee874db97536531555d91af Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 22 Jul 2015 16:30:36 +0200
Subject: [PATCH 203/734] iommu/vt-d: Rename domain_remove_one_dev_info()

Rename the function to dmar_remove_one_dev_info to match is
name better with its dmar_insert_one_dev_info counterpart.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 4a7fc0aebded87..faa95d15fe0a7a 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -472,8 +472,8 @@ static long list_size;
 
 static void domain_exit(struct dmar_domain *domain);
 static void domain_remove_dev_info(struct dmar_domain *domain);
-static void domain_remove_one_dev_info(struct dmar_domain *domain,
-				       struct device *dev);
+static void dmar_remove_one_dev_info(struct dmar_domain *domain,
+				     struct device *dev);
 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
 					   struct device *dev);
 static int domain_detach_iommu(struct dmar_domain *domain,
@@ -1631,7 +1631,7 @@ static void disable_dmar_iommu(struct intel_iommu *iommu)
 
 		domain = info->domain;
 
-		domain_remove_one_dev_info(domain, info->dev);
+		dmar_remove_one_dev_info(domain, info->dev);
 
 		if (!domain_type_is_vm_or_si(domain))
 			domain_exit(domain);
@@ -2255,7 +2255,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain)
 	struct device_domain_info *info, *tmp;
 
 	list_for_each_entry_safe(info, tmp, &domain->devices, link)
-		domain_remove_one_dev_info(domain, info->dev);
+		dmar_remove_one_dev_info(domain, info->dev);
 }
 
 /*
@@ -2329,7 +2329,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
 
 	if (dev && domain_context_mapping(domain, dev)) {
 		pr_err("Domain context map for %s failed\n", dev_name(dev));
-		domain_remove_one_dev_info(domain, dev);
+		dmar_remove_one_dev_info(domain, dev);
 		return NULL;
 	}
 
@@ -3300,7 +3300,7 @@ static int iommu_no_mapping(struct device *dev)
 			 * 32 bit DMA is removed from si_domain and fall back
 			 * to non-identity mapping.
 			 */
-			domain_remove_one_dev_info(si_domain, dev);
+			dmar_remove_one_dev_info(si_domain, dev);
 			pr_info("32bit %s uses non-identity mapping\n",
 				dev_name(dev));
 			return 0;
@@ -4307,7 +4307,7 @@ static int device_notifier(struct notifier_block *nb,
 		return 0;
 
 	down_read(&dmar_global_lock);
-	domain_remove_one_dev_info(domain, dev);
+	dmar_remove_one_dev_info(domain, dev);
 	if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
 		domain_exit(domain);
 	up_read(&dmar_global_lock);
@@ -4574,8 +4574,8 @@ static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
 	pci_for_each_dma_alias(to_pci_dev(dev), &iommu_detach_dev_cb, iommu);
 }
 
-static void domain_remove_one_dev_info(struct dmar_domain *domain,
-				       struct device *dev)
+static void dmar_remove_one_dev_info(struct dmar_domain *domain,
+				     struct device *dev)
 {
 	struct device_domain_info *info;
 	struct intel_iommu *iommu;
@@ -4686,7 +4686,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
 		old_domain = find_domain(dev);
 		if (old_domain) {
 			if (domain_type_is_vm_or_si(dmar_domain))
-				domain_remove_one_dev_info(old_domain, dev);
+				dmar_remove_one_dev_info(old_domain, dev);
 			else
 				domain_remove_dev_info(old_domain);
 
@@ -4734,7 +4734,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
 static void intel_iommu_detach_device(struct iommu_domain *domain,
 				      struct device *dev)
 {
-	domain_remove_one_dev_info(to_dmar_domain(domain), dev);
+	dmar_remove_one_dev_info(to_dmar_domain(domain), dev);
 }
 
 static int intel_iommu_map(struct iommu_domain *domain,

From 2452d9db1218fdb1c29afb921838c323987f5799 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 23 Jul 2015 16:20:14 +0200
Subject: [PATCH 204/734] iommu/vt-d: Rename iommu_detach_dependent_devices()

Rename this function and the ones further down its
call-chain to domain_context_clear_*. In particular this
means:

	iommu_detach_dependent_devices -> domain_context_clear
		   iommu_detach_dev_cb -> domain_context_clear_one_cb
		      iommu_detach_dev -> domain_context_clear_one

These names match a lot better with its
domain_context_mapping counterparts.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index faa95d15fe0a7a..62c27eff549d06 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -474,8 +474,8 @@ static void domain_exit(struct dmar_domain *domain);
 static void domain_remove_dev_info(struct dmar_domain *domain);
 static void dmar_remove_one_dev_info(struct dmar_domain *domain,
 				     struct device *dev);
-static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
-					   struct device *dev);
+static void domain_context_clear(struct intel_iommu *iommu,
+				 struct device *dev);
 static int domain_detach_iommu(struct dmar_domain *domain,
 			       struct intel_iommu *iommu);
 
@@ -2230,7 +2230,7 @@ static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long i
 	return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
 }
 
-static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
+static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
 {
 	if (!iommu)
 		return;
@@ -4551,11 +4551,11 @@ int __init intel_iommu_init(void)
 	return ret;
 }
 
-static int iommu_detach_dev_cb(struct pci_dev *pdev, u16 alias, void *opaque)
+static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
 {
 	struct intel_iommu *iommu = opaque;
 
-	iommu_detach_dev(iommu, PCI_BUS_NUM(alias), alias & 0xff);
+	domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
 	return 0;
 }
 
@@ -4565,13 +4565,12 @@ static int iommu_detach_dev_cb(struct pci_dev *pdev, u16 alias, void *opaque)
  * devices, unbinding the driver from any one of them will possibly leave
  * the others unable to operate.
  */
-static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
-					   struct device *dev)
+static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
 {
 	if (!iommu || !dev || !dev_is_pci(dev))
 		return;
 
-	pci_for_each_dma_alias(to_pci_dev(dev), &iommu_detach_dev_cb, iommu);
+	pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
 }
 
 static void dmar_remove_one_dev_info(struct dmar_domain *domain,
@@ -4596,8 +4595,7 @@ static void dmar_remove_one_dev_info(struct dmar_domain *domain,
 	spin_unlock_irqrestore(&device_domain_lock, flags);
 
 	iommu_disable_dev_iotlb(info);
-	iommu_detach_dev(iommu, info->bus, info->devfn);
-	iommu_detach_dependent_devices(iommu, dev);
+	domain_context_clear(iommu, dev);
 	free_devinfo_mem(info);
 	domain_detach_iommu(domain, iommu);
 

From dc534b25d11f42e6b4caa5b1918f549d9c0e9d4d Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 22 Jul 2015 12:44:02 +0200
Subject: [PATCH 205/734] iommu/vt-d: Pass an iommu pointer to domain_init()

This allows to do domain->iommu attachment after domain_init
has run.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 62c27eff549d06..de5384e7e0521c 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1832,9 +1832,9 @@ static inline int guestwidth_to_adjustwidth(int gaw)
 	return agaw;
 }
 
-static int domain_init(struct dmar_domain *domain, int guest_width)
+static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
+		       int guest_width)
 {
-	struct intel_iommu *iommu;
 	int adjust_width, agaw;
 	unsigned long sagaw;
 
@@ -1843,7 +1843,6 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
 	domain_reserve_special_ranges(domain);
 
 	/* calculate AGAW */
-	iommu = domain_get_iommu(domain);
 	if (guest_width > cap_mgaw(iommu->cap))
 		guest_width = cap_mgaw(iommu->cap);
 	domain->gaw = guest_width;
@@ -2389,7 +2388,7 @@ static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
 		return NULL;
 	}
 	domain_attach_iommu(domain, iommu);
-	if (domain_init(domain, gaw)) {
+	if (domain_init(domain, iommu, gaw)) {
 		domain_exit(domain);
 		return NULL;
 	}

From c6c2cebd665933216785246a1d15b4112fa74bbf Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 22 Jul 2015 13:11:53 +0200
Subject: [PATCH 206/734] iommu/vt-d: Establish domain<->iommu link in
 dmar_insert_one_dev_info

This makes domain attachment more synchronous with domain
deattachment. The domain<->iommu link is released in
dmar_remove_one_dev_info.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 28 +++++++++++-----------------
 1 file changed, 11 insertions(+), 17 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index de5384e7e0521c..46359bd9da63cc 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1919,14 +1919,16 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
 				      struct intel_iommu *iommu,
 				      u8 bus, u8 devfn)
 {
+	u16 did = domain->iommu_did[iommu->seq_id];
 	int translation = CONTEXT_TT_MULTI_LEVEL;
 	struct device_domain_info *info = NULL;
 	struct context_entry *context;
 	unsigned long flags;
 	struct dma_pte *pgd;
-	int id;
 	int agaw;
 
+	WARN_ON(did == 0);
+
 	if (hw_pass_through && domain_type_is_si(domain))
 		translation = CONTEXT_TT_PASS_THROUGH;
 
@@ -1948,15 +1950,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
 
 	pgd = domain->pgd;
 
-	id = __iommu_attach_domain(domain, iommu);
-	if (id < 0) {
-		spin_unlock_irqrestore(&iommu->lock, flags);
-		pr_err("%s: No free domain ids\n", iommu->name);
-		return -EFAULT;
-	}
-
 	context_clear_entry(context);
-	context_set_domain_id(context, id);
+	context_set_domain_id(context, did);
 
 	/*
 	 * Skip top levels of page tables for iommu which has less agaw
@@ -2002,15 +1997,13 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
 					   (((u16)bus) << 8) | devfn,
 					   DMA_CCMD_MASK_NOBIT,
 					   DMA_CCMD_DEVICE_INVL);
-		iommu->flush.flush_iotlb(iommu, id, 0, 0, DMA_TLB_DSI_FLUSH);
+		iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
 	} else {
 		iommu_flush_write_buffer(iommu);
 	}
 	iommu_enable_dev_iotlb(info);
 	spin_unlock_irqrestore(&iommu->lock, flags);
 
-	domain_attach_iommu(domain, iommu);
-
 	return 0;
 }
 
@@ -2320,6 +2313,12 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
 		return found;
 	}
 
+	if (iommu_attach_domain(domain, iommu) < 0) {
+		spin_unlock_irqrestore(&device_domain_lock, flags);
+		return NULL;
+	}
+	domain_attach_iommu(domain, iommu);
+
 	list_add(&info->link, &domain->devices);
 	list_add(&info->global, &device_domain_list);
 	if (dev)
@@ -2383,11 +2382,6 @@ static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
 	domain = alloc_domain(0);
 	if (!domain)
 		return NULL;
-	if (iommu_attach_domain(domain, iommu) < 0) {
-		free_domain_mem(domain);
-		return NULL;
-	}
-	domain_attach_iommu(domain, iommu);
 	if (domain_init(domain, iommu, gaw)) {
 		domain_exit(domain);
 		return NULL;

From d160aca5276d093fc68d6ff48888586c90309d03 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 22 Jul 2015 11:52:53 +0200
Subject: [PATCH 207/734] iommu/vt-d: Unify domain->iommu attach/detachment

Move the code to attach/detach domains to iommus and vice
verce into a single function to make sure there are no
dangling references.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 125 ++++++++++++++----------------------
 1 file changed, 49 insertions(+), 76 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 46359bd9da63cc..1cb7a3eb29d3f1 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1678,90 +1678,64 @@ static struct dmar_domain *alloc_domain(int flags)
 	return domain;
 }
 
-static int __iommu_attach_domain(struct dmar_domain *domain,
-				 struct intel_iommu *iommu)
-{
-	int num;
-	unsigned long ndomains;
-
-	num = domain->iommu_did[iommu->seq_id];
-	if (num)
-		return num;
-
-	ndomains = cap_ndoms(iommu->cap);
-	num	 = find_first_zero_bit(iommu->domain_ids, ndomains);
-
-	if (num < ndomains) {
-		set_bit(num, iommu->domain_ids);
-		set_iommu_domain(iommu, num, domain);
-		domain->iommu_did[iommu->seq_id] = num;
-	} else {
-		num = -ENOSPC;
-	}
-
-	if (num < 0)
-		pr_err("%s: No free domain ids\n", iommu->name);
-
-	return num;
-}
-
-static int iommu_attach_domain(struct dmar_domain *domain,
+/* Must be called with iommu->lock */
+static int domain_attach_iommu(struct dmar_domain *domain,
 			       struct intel_iommu *iommu)
 {
-	int num;
-	unsigned long flags;
-
-	spin_lock_irqsave(&iommu->lock, flags);
-	num = __iommu_attach_domain(domain, iommu);
-	spin_unlock_irqrestore(&iommu->lock, flags);
-
-	return num;
-}
-
-static void iommu_detach_domain(struct dmar_domain *domain,
-				struct intel_iommu *iommu)
-{
+	unsigned long ndomains;
 	unsigned long flags;
-	int num;
-
-	spin_lock_irqsave(&iommu->lock, flags);
+	int ret, num;
 
-	num = domain->iommu_did[iommu->seq_id];
-
-	if (num == 0)
-		return;
-
-	clear_bit(num, iommu->domain_ids);
-	set_iommu_domain(iommu, num, NULL);
-
-	spin_unlock_irqrestore(&iommu->lock, flags);
-}
-
-static void domain_attach_iommu(struct dmar_domain *domain,
-			       struct intel_iommu *iommu)
-{
-	unsigned long flags;
+	assert_spin_locked(&iommu->lock);
 
 	spin_lock_irqsave(&domain->iommu_lock, flags);
+
 	domain->iommu_refcnt[iommu->seq_id] += 1;
 	domain->iommu_count += 1;
 	if (domain->iommu_refcnt[iommu->seq_id] == 1) {
-		domain->nid = iommu->node;
+		ndomains = cap_ndoms(iommu->cap);
+		num      = find_first_zero_bit(iommu->domain_ids, ndomains);
+
+		if (num >= ndomains) {
+			pr_err("%s: No free domain ids\n", iommu->name);
+			domain->iommu_refcnt[iommu->seq_id] -= 1;
+			domain->iommu_count -= 1;
+			ret = -ENOSPC;
+			goto out_unlock;
+		}
+
+		set_bit(num, iommu->domain_ids);
+		set_iommu_domain(iommu, num, domain);
+
+		domain->iommu_did[iommu->seq_id] = num;
+		domain->nid			 = iommu->node;
+
 		domain_update_iommu_cap(domain);
 	}
+
+	ret = 0;
+out_unlock:
 	spin_unlock_irqrestore(&domain->iommu_lock, flags);
+
+	return ret;
 }
 
 static int domain_detach_iommu(struct dmar_domain *domain,
 			       struct intel_iommu *iommu)
 {
+	int num, count = INT_MAX;
 	unsigned long flags;
-	int count = INT_MAX;
+
+	assert_spin_locked(&iommu->lock);
 
 	spin_lock_irqsave(&domain->iommu_lock, flags);
 	domain->iommu_refcnt[iommu->seq_id] -= 1;
 	count = --domain->iommu_count;
 	if (domain->iommu_refcnt[iommu->seq_id] == 0) {
+		num = domain->iommu_did[iommu->seq_id];
+		clear_bit(num, iommu->domain_ids);
+		set_iommu_domain(iommu, num, NULL);
+
 		domain_update_iommu_cap(domain);
 		domain->iommu_did[iommu->seq_id] = 0;
 	}
@@ -1886,7 +1860,6 @@ static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
 static void domain_exit(struct dmar_domain *domain)
 {
 	struct page *freelist = NULL;
-	int i;
 
 	/* Domain 0 is reserved, so dont process it */
 	if (!domain)
@@ -1896,20 +1869,16 @@ static void domain_exit(struct dmar_domain *domain)
 	if (!intel_iommu_strict)
 		flush_unmaps_timeout(0);
 
-	/* remove associated devices */
+	/* Remove associated devices and clear attached or cached domains */
+	rcu_read_lock();
 	domain_remove_dev_info(domain);
+	rcu_read_unlock();
 
 	/* destroy iovas */
 	put_iova_domain(&domain->iovad);
 
 	freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
 
-	/* clear attached or cached domains */
-	rcu_read_lock();
-	for_each_domain_iommu(i, domain)
-		iommu_detach_domain(domain, g_iommus[i]);
-	rcu_read_unlock();
-
 	dma_free_pagelist(freelist);
 
 	free_domain_mem(domain);
@@ -2286,6 +2255,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
 	struct dmar_domain *found = NULL;
 	struct device_domain_info *info;
 	unsigned long flags;
+	int ret;
 
 	info = alloc_devinfo_mem();
 	if (!info)
@@ -2313,11 +2283,14 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
 		return found;
 	}
 
-	if (iommu_attach_domain(domain, iommu) < 0) {
+	spin_lock(&iommu->lock);
+	ret = domain_attach_iommu(domain, iommu);
+	spin_unlock(&iommu->lock);
+
+	if (ret) {
 		spin_unlock_irqrestore(&device_domain_lock, flags);
 		return NULL;
 	}
-	domain_attach_iommu(domain, iommu);
 
 	list_add(&info->link, &domain->devices);
 	list_add(&info->global, &device_domain_list);
@@ -4590,12 +4563,10 @@ static void dmar_remove_one_dev_info(struct dmar_domain *domain,
 	iommu_disable_dev_iotlb(info);
 	domain_context_clear(iommu, dev);
 	free_devinfo_mem(info);
-	domain_detach_iommu(domain, iommu);
 
-	spin_lock_irqsave(&domain->iommu_lock, flags);
-	if (!domain->iommu_refcnt[iommu->seq_id])
-		iommu_detach_domain(domain, iommu);
-	spin_unlock_irqrestore(&domain->iommu_lock, flags);
+	spin_lock_irqsave(&iommu->lock, flags);
+	domain_detach_iommu(domain, iommu);
+	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
 static int md_domain_init(struct dmar_domain *domain, int guest_width)
@@ -4676,10 +4647,12 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
 
 		old_domain = find_domain(dev);
 		if (old_domain) {
+			rcu_read_lock();
 			if (domain_type_is_vm_or_si(dmar_domain))
 				dmar_remove_one_dev_info(old_domain, dev);
 			else
 				domain_remove_dev_info(old_domain);
+			rcu_read_unlock();
 
 			if (!domain_type_is_vm_or_si(old_domain) &&
 			     list_empty(&old_domain->devices))

From de7e888646466e6c32cdd41124c0164cfed4abcb Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 22 Jul 2015 11:58:07 +0200
Subject: [PATCH 208/734] iommu/vt-d: Only call domain_remove_one_dev_info to
 detach old domain

There is no need to make a difference here between VM and
non-VM domains, so simplify this code here.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 1cb7a3eb29d3f1..c8d9bef776da86 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -4648,10 +4648,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
 		old_domain = find_domain(dev);
 		if (old_domain) {
 			rcu_read_lock();
-			if (domain_type_is_vm_or_si(dmar_domain))
-				dmar_remove_one_dev_info(old_domain, dev);
-			else
-				domain_remove_dev_info(old_domain);
+			dmar_remove_one_dev_info(old_domain, dev);
 			rcu_read_unlock();
 
 			if (!domain_type_is_vm_or_si(old_domain) &&

From 55d940430ab91b89ff5fc7240555544d86475783 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 22 Jul 2015 16:50:40 +0200
Subject: [PATCH 209/734] iommu/vt-d: Get rid of domain->iommu_lock

When this lock is held the device_domain_lock is also
required to make sure the device_domain_info does not vanish
while in use. So this lock can be removed as it gives no
additional protection.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 87 +++++++++++++++++++++----------------
 1 file changed, 49 insertions(+), 38 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index c8d9bef776da86..0f258f0f5ac02f 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -406,7 +406,6 @@ struct dmar_domain {
 	int		iommu_superpage;/* Level of superpages supported:
 					   0 == 4KiB (no superpages), 1 == 2MiB,
 					   2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
-	spinlock_t	iommu_lock;	/* protect iommu set in domain */
 	u64		max_addr;	/* maximum mapped address */
 
 	struct iommu_domain domain;	/* generic domain data structure for
@@ -476,6 +475,8 @@ static void dmar_remove_one_dev_info(struct dmar_domain *domain,
 				     struct device *dev);
 static void domain_context_clear(struct intel_iommu *iommu,
 				 struct device *dev);
+static void __dmar_remove_one_dev_info(struct dmar_domain *domain,
+				       struct device *dev);
 static int domain_detach_iommu(struct dmar_domain *domain,
 			       struct intel_iommu *iommu);
 
@@ -1404,24 +1405,23 @@ iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
 			 u8 bus, u8 devfn)
 {
 	bool found = false;
-	unsigned long flags;
 	struct device_domain_info *info;
 	struct pci_dev *pdev;
 
+	assert_spin_locked(&device_domain_lock);
+
 	if (!ecap_dev_iotlb_support(iommu->ecap))
 		return NULL;
 
 	if (!iommu->qi)
 		return NULL;
 
-	spin_lock_irqsave(&device_domain_lock, flags);
 	list_for_each_entry(info, &domain->devices, link)
 		if (info->iommu == iommu && info->bus == bus &&
 		    info->devfn == devfn) {
 			found = true;
 			break;
 		}
-	spin_unlock_irqrestore(&device_domain_lock, flags);
 
 	if (!found || !info->dev || !dev_is_pci(info->dev))
 		return NULL;
@@ -1616,10 +1616,12 @@ static int iommu_init_domains(struct intel_iommu *iommu)
 static void disable_dmar_iommu(struct intel_iommu *iommu)
 {
 	struct device_domain_info *info, *tmp;
+	unsigned long flags;
 
 	if (!iommu->domains || !iommu->domain_ids)
 		return;
 
+	spin_lock_irqsave(&device_domain_lock, flags);
 	list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
 		struct dmar_domain *domain;
 
@@ -1636,6 +1638,7 @@ static void disable_dmar_iommu(struct intel_iommu *iommu)
 		if (!domain_type_is_vm_or_si(domain))
 			domain_exit(domain);
 	}
+	spin_unlock_irqrestore(&device_domain_lock, flags);
 
 	if (iommu->gcmd & DMA_GCMD_TE)
 		iommu_disable_translation(iommu);
@@ -1672,7 +1675,6 @@ static struct dmar_domain *alloc_domain(int flags)
 	memset(domain, 0, sizeof(*domain));
 	domain->nid = -1;
 	domain->flags = flags;
-	spin_lock_init(&domain->iommu_lock);
 	INIT_LIST_HEAD(&domain->devices);
 
 	return domain;
@@ -1683,13 +1685,11 @@ static int domain_attach_iommu(struct dmar_domain *domain,
 			       struct intel_iommu *iommu)
 {
 	unsigned long ndomains;
-	unsigned long flags;
-	int ret, num;
+	int num;
 
+	assert_spin_locked(&device_domain_lock);
 	assert_spin_locked(&iommu->lock);
 
-	spin_lock_irqsave(&domain->iommu_lock, flags);
-
 	domain->iommu_refcnt[iommu->seq_id] += 1;
 	domain->iommu_count += 1;
 	if (domain->iommu_refcnt[iommu->seq_id] == 1) {
@@ -1700,8 +1700,7 @@ static int domain_attach_iommu(struct dmar_domain *domain,
 			pr_err("%s: No free domain ids\n", iommu->name);
 			domain->iommu_refcnt[iommu->seq_id] -= 1;
 			domain->iommu_count -= 1;
-			ret = -ENOSPC;
-			goto out_unlock;
+			return -ENOSPC;
 		}
 
 		set_bit(num, iommu->domain_ids);
@@ -1713,22 +1712,17 @@ static int domain_attach_iommu(struct dmar_domain *domain,
 		domain_update_iommu_cap(domain);
 	}
 
-	ret = 0;
-out_unlock:
-	spin_unlock_irqrestore(&domain->iommu_lock, flags);
-
-	return ret;
+	return 0;
 }
 
 static int domain_detach_iommu(struct dmar_domain *domain,
 			       struct intel_iommu *iommu)
 {
 	int num, count = INT_MAX;
-	unsigned long flags;
 
+	assert_spin_locked(&device_domain_lock);
 	assert_spin_locked(&iommu->lock);
 
-	spin_lock_irqsave(&domain->iommu_lock, flags);
 	domain->iommu_refcnt[iommu->seq_id] -= 1;
 	count = --domain->iommu_count;
 	if (domain->iommu_refcnt[iommu->seq_id] == 0) {
@@ -1739,7 +1733,6 @@ static int domain_detach_iommu(struct dmar_domain *domain,
 		domain_update_iommu_cap(domain);
 		domain->iommu_did[iommu->seq_id] = 0;
 	}
-	spin_unlock_irqrestore(&domain->iommu_lock, flags);
 
 	return count;
 }
@@ -1894,7 +1887,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
 	struct context_entry *context;
 	unsigned long flags;
 	struct dma_pte *pgd;
-	int agaw;
+	int ret, agaw;
 
 	WARN_ON(did == 0);
 
@@ -1906,16 +1899,17 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
 
 	BUG_ON(!domain->pgd);
 
-	spin_lock_irqsave(&iommu->lock, flags);
+	spin_lock_irqsave(&device_domain_lock, flags);
+	spin_lock(&iommu->lock);
+
+	ret = -ENOMEM;
 	context = iommu_context_addr(iommu, bus, devfn, 1);
-	spin_unlock_irqrestore(&iommu->lock, flags);
 	if (!context)
-		return -ENOMEM;
-	spin_lock_irqsave(&iommu->lock, flags);
-	if (context_present(context)) {
-		spin_unlock_irqrestore(&iommu->lock, flags);
-		return 0;
-	}
+		goto out_unlock;
+
+	ret = 0;
+	if (context_present(context))
+		goto out_unlock;
 
 	pgd = domain->pgd;
 
@@ -1928,11 +1922,10 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
 	 */
 	if (translation != CONTEXT_TT_PASS_THROUGH) {
 		for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
+			ret = -ENOMEM;
 			pgd = phys_to_virt(dma_pte_addr(pgd));
-			if (!dma_pte_present(pgd)) {
-				spin_unlock_irqrestore(&iommu->lock, flags);
-				return -ENOMEM;
-			}
+			if (!dma_pte_present(pgd))
+				goto out_unlock;
 		}
 
 		info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
@@ -1971,7 +1964,12 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
 		iommu_flush_write_buffer(iommu);
 	}
 	iommu_enable_dev_iotlb(info);
-	spin_unlock_irqrestore(&iommu->lock, flags);
+
+	ret = 0;
+
+out_unlock:
+	spin_unlock(&iommu->lock);
+	spin_unlock_irqrestore(&device_domain_lock, flags);
 
 	return 0;
 }
@@ -2214,9 +2212,12 @@ static inline void unlink_domain_info(struct device_domain_info *info)
 static void domain_remove_dev_info(struct dmar_domain *domain)
 {
 	struct device_domain_info *info, *tmp;
+	unsigned long flags;
 
+	spin_lock_irqsave(&device_domain_lock, flags);
 	list_for_each_entry_safe(info, tmp, &domain->devices, link)
-		dmar_remove_one_dev_info(domain, info->dev);
+		__dmar_remove_one_dev_info(domain, info->dev);
+	spin_unlock_irqrestore(&device_domain_lock, flags);
 }
 
 /*
@@ -4539,14 +4540,16 @@ static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
 	pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
 }
 
-static void dmar_remove_one_dev_info(struct dmar_domain *domain,
-				     struct device *dev)
+static void __dmar_remove_one_dev_info(struct dmar_domain *domain,
+				       struct device *dev)
 {
 	struct device_domain_info *info;
 	struct intel_iommu *iommu;
 	unsigned long flags;
 	u8 bus, devfn;
 
+	assert_spin_locked(&device_domain_lock);
+
 	iommu = device_to_iommu(dev, &bus, &devfn);
 	if (!iommu)
 		return;
@@ -4556,9 +4559,7 @@ static void dmar_remove_one_dev_info(struct dmar_domain *domain,
 	if (WARN_ON(!info))
 		return;
 
-	spin_lock_irqsave(&device_domain_lock, flags);
 	unlink_domain_info(info);
-	spin_unlock_irqrestore(&device_domain_lock, flags);
 
 	iommu_disable_dev_iotlb(info);
 	domain_context_clear(iommu, dev);
@@ -4569,6 +4570,16 @@ static void dmar_remove_one_dev_info(struct dmar_domain *domain,
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
+static void dmar_remove_one_dev_info(struct dmar_domain *domain,
+				     struct device *dev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&device_domain_lock, flags);
+	__dmar_remove_one_dev_info(domain, dev);
+	spin_unlock_irqrestore(&device_domain_lock, flags);
+}
+
 static int md_domain_init(struct dmar_domain *domain, int guest_width)
 {
 	int adjust_width;

From 2309bd793ead6d5e4ace611502aa87b3202856ca Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 22 Jul 2015 17:29:47 +0200
Subject: [PATCH 210/734] iommu/vt-d: Remove dmar_global_lock from
 device_notifier

The code in the locked section does not touch anything
protected by the dmar_global_lock. Remove it from there.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 0f258f0f5ac02f..d55ef9df0879e6 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -4273,11 +4273,9 @@ static int device_notifier(struct notifier_block *nb,
 	if (!domain)
 		return 0;
 
-	down_read(&dmar_global_lock);
 	dmar_remove_one_dev_info(domain, dev);
 	if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
 		domain_exit(domain);
-	up_read(&dmar_global_lock);
 
 	return 0;
 }

From 127c761598f7fbe7ffe6650cdc491eb57c5aaecd Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 23 Jul 2015 17:44:46 +0200
Subject: [PATCH 211/734] iommu/vt-d: Pass device_domain_info to
 __dmar_remove_one_dev_info

This struct contains all necessary information for the
function already. Also handle the info->dev == NULL case
while at it.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 33 +++++++++++++++------------------
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index d55ef9df0879e6..18e44421a862f9 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -473,10 +473,9 @@ static void domain_exit(struct dmar_domain *domain);
 static void domain_remove_dev_info(struct dmar_domain *domain);
 static void dmar_remove_one_dev_info(struct dmar_domain *domain,
 				     struct device *dev);
+static void __dmar_remove_one_dev_info(struct device_domain_info *info);
 static void domain_context_clear(struct intel_iommu *iommu,
 				 struct device *dev);
-static void __dmar_remove_one_dev_info(struct dmar_domain *domain,
-				       struct device *dev);
 static int domain_detach_iommu(struct dmar_domain *domain,
 			       struct intel_iommu *iommu);
 
@@ -2216,7 +2215,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain)
 
 	spin_lock_irqsave(&device_domain_lock, flags);
 	list_for_each_entry_safe(info, tmp, &domain->devices, link)
-		__dmar_remove_one_dev_info(domain, info->dev);
+		__dmar_remove_one_dev_info(info);
 	spin_unlock_irqrestore(&device_domain_lock, flags);
 }
 
@@ -4538,43 +4537,41 @@ static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
 	pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
 }
 
-static void __dmar_remove_one_dev_info(struct dmar_domain *domain,
-				       struct device *dev)
+static void __dmar_remove_one_dev_info(struct device_domain_info *info)
 {
-	struct device_domain_info *info;
 	struct intel_iommu *iommu;
 	unsigned long flags;
-	u8 bus, devfn;
 
 	assert_spin_locked(&device_domain_lock);
 
-	iommu = device_to_iommu(dev, &bus, &devfn);
-	if (!iommu)
+	if (WARN_ON(!info))
 		return;
 
-	info = dev->archdata.iommu;
+	iommu = info->iommu;
 
-	if (WARN_ON(!info))
-		return;
+	if (info->dev) {
+		iommu_disable_dev_iotlb(info);
+		domain_context_clear(iommu, info->dev);
+	}
 
 	unlink_domain_info(info);
 
-	iommu_disable_dev_iotlb(info);
-	domain_context_clear(iommu, dev);
-	free_devinfo_mem(info);
-
 	spin_lock_irqsave(&iommu->lock, flags);
-	domain_detach_iommu(domain, iommu);
+	domain_detach_iommu(info->domain, iommu);
 	spin_unlock_irqrestore(&iommu->lock, flags);
+
+	free_devinfo_mem(info);
 }
 
 static void dmar_remove_one_dev_info(struct dmar_domain *domain,
 				     struct device *dev)
 {
+	struct device_domain_info *info;
 	unsigned long flags;
 
 	spin_lock_irqsave(&device_domain_lock, flags);
-	__dmar_remove_one_dev_info(domain, dev);
+	info = dev->archdata.iommu;
+	__dmar_remove_one_dev_info(info);
 	spin_unlock_irqrestore(&device_domain_lock, flags);
 }
 

From 08a7f456a759e971caf0cc13987a963de2b0ae7c Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 23 Jul 2015 18:09:11 +0200
Subject: [PATCH 212/734] iommu/vt-d: Only insert alias dev_info if there is an
 alias

For devices without an PCI alias there will be two
device_domain_info structures added. Prevent that by
checking if the alias is different from the device.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 18e44421a862f9..6e61b3eb47e88d 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -2319,8 +2319,8 @@ static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
 	struct device_domain_info *info = NULL;
 	struct dmar_domain *domain, *tmp;
 	struct intel_iommu *iommu;
+	u16 req_id, dma_alias;
 	unsigned long flags;
-	u16 dma_alias;
 	u8 bus, devfn;
 
 	domain = find_domain(dev);
@@ -2331,6 +2331,8 @@ static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
 	if (!iommu)
 		return NULL;
 
+	req_id = ((u16)bus << 8) | devfn;
+
 	if (dev_is_pci(dev)) {
 		struct pci_dev *pdev = to_pci_dev(dev);
 
@@ -2361,7 +2363,7 @@ static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
 	}
 
 	/* register PCI DMA alias device */
-	if (dev_is_pci(dev)) {
+	if (req_id != dma_alias && dev_is_pci(dev)) {
 		tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
 					       dma_alias & 0xff, NULL, domain);
 

From f303e50766298feac17c8715e29ecd14b2c12680 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 23 Jul 2015 18:37:13 +0200
Subject: [PATCH 213/734] iommu/vt-d: Avoid duplicate device_domain_info
 structures

When a 'struct device_domain_info' is created as an alias
for another device, this struct will not be re-used when the
real device is encountered. Fix that to avoid duplicate
device_domain_info structures being added.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 6e61b3eb47e88d..8834765e90c68a 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -2270,12 +2270,16 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
 	spin_lock_irqsave(&device_domain_lock, flags);
 	if (dev)
 		found = find_domain(dev);
-	else {
+
+	if (!found) {
 		struct device_domain_info *info2;
 		info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
-		if (info2)
-			found = info2->domain;
+		if (info2) {
+			found      = info2->domain;
+			info2->dev = dev;
+		}
 	}
+
 	if (found) {
 		spin_unlock_irqrestore(&device_domain_lock, flags);
 		free_devinfo_mem(info);

From 2c7fd3675ef1867d0d0c39e9f0bb5ddb67bfc7a7 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 20 Jul 2015 21:16:31 +0100
Subject: [PATCH 214/734] PKCS#7: Check content type and versions

We only support PKCS#7 signed-data [RFC2315 sec 9] content at the top level,
so reject anything else.  Further, check that the version numbers in
SignedData and SignerInfo are 1 in both cases.

Note that we don't restrict the inner content type.  In the PKCS#7 code we
don't parse the data attached there, but merely verify the signature over
it.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-By: David Woodhouse <David.Woodhouse@intel.com>
---
 crypto/asymmetric_keys/pkcs7.asn1     |  6 +--
 crypto/asymmetric_keys/pkcs7_parser.c | 75 ++++++++++++++++++++++++++-
 2 files changed, 77 insertions(+), 4 deletions(-)

diff --git a/crypto/asymmetric_keys/pkcs7.asn1 b/crypto/asymmetric_keys/pkcs7.asn1
index a5a14ef28c869a..05504431e1c13a 100644
--- a/crypto/asymmetric_keys/pkcs7.asn1
+++ b/crypto/asymmetric_keys/pkcs7.asn1
@@ -1,12 +1,12 @@
 PKCS7ContentInfo ::= SEQUENCE {
-	contentType	ContentType,
+	contentType	ContentType ({ pkcs7_check_content_type }),
 	content		[0] EXPLICIT SignedData OPTIONAL
 }
 
 ContentType ::= OBJECT IDENTIFIER ({ pkcs7_note_OID })
 
 SignedData ::= SEQUENCE {
-	version			INTEGER,
+	version			INTEGER ({ pkcs7_note_signeddata_version }),
 	digestAlgorithms	DigestAlgorithmIdentifiers,
 	contentInfo		ContentInfo,
 	certificates		CHOICE {
@@ -68,7 +68,7 @@ SignerInfos ::= CHOICE {
 }
 
 SignerInfo ::= SEQUENCE {
-	version			INTEGER,
+	version			INTEGER ({ pkcs7_note_signerinfo_version }),
 	issuerAndSerialNumber	IssuerAndSerialNumber,
 	digestAlgorithm		DigestAlgorithmIdentifier ({ pkcs7_sig_note_digest_algo }),
 	authenticatedAttributes	CHOICE {
diff --git a/crypto/asymmetric_keys/pkcs7_parser.c b/crypto/asymmetric_keys/pkcs7_parser.c
index 3bd5a1e4c493e2..ab427f04b2992e 100644
--- a/crypto/asymmetric_keys/pkcs7_parser.c
+++ b/crypto/asymmetric_keys/pkcs7_parser.c
@@ -225,6 +225,79 @@ int pkcs7_sig_note_pkey_algo(void *context, size_t hdrlen,
 	return 0;
 }
 
+/*
+ * We only support signed data [RFC2315 sec 9].
+ */
+int pkcs7_check_content_type(void *context, size_t hdrlen,
+			     unsigned char tag,
+			     const void *value, size_t vlen)
+{
+	struct pkcs7_parse_context *ctx = context;
+
+	if (ctx->last_oid != OID_signed_data) {
+		pr_warn("Only support pkcs7_signedData type\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * Note the SignedData version
+ */
+int pkcs7_note_signeddata_version(void *context, size_t hdrlen,
+				  unsigned char tag,
+				  const void *value, size_t vlen)
+{
+	unsigned version;
+
+	if (vlen != 1)
+		goto unsupported;
+
+	version = *(const u8 *)value;
+	switch (version) {
+	case 1:
+		/* PKCS#7 SignedData [RFC2315 sec 9.1] */
+		break;
+	default:
+		goto unsupported;
+	}
+
+	return 0;
+
+unsupported:
+	pr_warn("Unsupported SignedData version\n");
+	return -EINVAL;
+}
+
+/*
+ * Note the SignerInfo version
+ */
+int pkcs7_note_signerinfo_version(void *context, size_t hdrlen,
+				  unsigned char tag,
+				  const void *value, size_t vlen)
+{
+	unsigned version;
+
+	if (vlen != 1)
+		goto unsupported;
+
+	version = *(const u8 *)value;
+	switch (version) {
+	case 1:
+		/* PKCS#7 SignerInfo [RFC2315 sec 9.2] */
+		break;
+	default:
+		goto unsupported;
+	}
+
+	return 0;
+
+unsupported:
+	pr_warn("Unsupported SignerInfo version\n");
+	return -EINVAL;
+}
+
 /*
  * Extract a certificate and store it in the context.
  */
@@ -326,7 +399,7 @@ int pkcs7_sig_note_authenticated_attr(void *context, size_t hdrlen,
 }
 
 /*
- * Note the set of auth attributes for digestion purposes [RFC2315 9.3]
+ * Note the set of auth attributes for digestion purposes [RFC2315 sec 9.3]
  */
 int pkcs7_sig_note_set_of_authattrs(void *context, size_t hdrlen,
 				    unsigned char tag,

From a4c6e57f4f5fa65cbdb8cc1c14ff5ca7c56766c3 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 20 Jul 2015 21:16:32 +0100
Subject: [PATCH 215/734] X.509: Change recorded SKID & AKID to not include
 Subject or Issuer

The key identifiers fabricated from an X.509 certificate are currently:

 (A) Concatenation of serial number and issuer

 (B) Concatenation of subject and subjectKeyID (SKID)

When verifying one X.509 certificate with another, the AKID in the target
can be used to match the authoritative certificate.  The AKID can specify
the match in one or both of two ways:

 (1) Compare authorityCertSerialNumber and authorityCertIssuer from the AKID
     to identifier (A) above.

 (2) Compare keyIdentifier from the AKID plus the issuer from the target
     certificate to identifier (B) above.

When verifying a PKCS#7 message, the only available comparison is between
the IssuerAndSerialNumber field and identifier (A) above.

However, a subsequent patch adds CMS support.  Whilst CMS still supports a
match on IssuerAndSerialNumber as for PKCS#7, it also supports an
alternative - which is the SubjectKeyIdentifier field.  This is used to
match to an X.509 certificate on the SKID alone.  No subject information is
available to be used.

To this end change the fabrication of (B) above to be from the X.509 SKID
alone.  The AKID in keyIdentifier form then only matches on that and does
not include the issuer.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-By: David Woodhouse <David.Woodhouse@intel.com>
---
 crypto/asymmetric_keys/x509_cert_parser.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c
index 6c130dd56f3591..849fd760923ef5 100644
--- a/crypto/asymmetric_keys/x509_cert_parser.c
+++ b/crypto/asymmetric_keys/x509_cert_parser.c
@@ -454,9 +454,7 @@ int x509_process_extension(void *context, size_t hdrlen,
 
 		ctx->cert->raw_skid_size = vlen;
 		ctx->cert->raw_skid = v;
-		kid = asymmetric_key_generate_id(ctx->cert->raw_subject,
-						 ctx->cert->raw_subject_size,
-						 v, vlen);
+		kid = asymmetric_key_generate_id(v, vlen, "", 0);
 		if (IS_ERR(kid))
 			return PTR_ERR(kid);
 		ctx->cert->skid = kid;
@@ -553,9 +551,7 @@ int x509_akid_note_kid(void *context, size_t hdrlen,
 	if (ctx->cert->akid_skid)
 		return 0;
 
-	kid = asymmetric_key_generate_id(ctx->cert->raw_issuer,
-					 ctx->cert->raw_issuer_size,
-					 value, vlen);
+	kid = asymmetric_key_generate_id(value, vlen, "", 0);
 	if (IS_ERR(kid))
 		return PTR_ERR(kid);
 	pr_debug("authkeyid %*phN\n", kid->len, kid->data);

From 60d65cacd7c2d84a6dcad69bcb57bbf0220c8643 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 20 Jul 2015 21:16:33 +0100
Subject: [PATCH 216/734] PKCS#7: Support CMS messages also [RFC5652]

Since CMS is an evolution of PKCS#7, with much of the ASN.1 being
compatible, add support for CMS signed-data messages also [RFC5652 sec 5].

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-By: David Woodhouse <David.Woodhouse@intel.com>
---
 crypto/asymmetric_keys/pkcs7.asn1     | 10 ++++-
 crypto/asymmetric_keys/pkcs7_parser.c | 62 ++++++++++++++++++++++++---
 crypto/asymmetric_keys/pkcs7_parser.h |  5 ++-
 3 files changed, 68 insertions(+), 9 deletions(-)

diff --git a/crypto/asymmetric_keys/pkcs7.asn1 b/crypto/asymmetric_keys/pkcs7.asn1
index 05504431e1c13a..6bf8ff4f7414ee 100644
--- a/crypto/asymmetric_keys/pkcs7.asn1
+++ b/crypto/asymmetric_keys/pkcs7.asn1
@@ -69,7 +69,7 @@ SignerInfos ::= CHOICE {
 
 SignerInfo ::= SEQUENCE {
 	version			INTEGER ({ pkcs7_note_signerinfo_version }),
-	issuerAndSerialNumber	IssuerAndSerialNumber,
+	sid			SignerIdentifier, -- CMS variant, not PKCS#7
 	digestAlgorithm		DigestAlgorithmIdentifier ({ pkcs7_sig_note_digest_algo }),
 	authenticatedAttributes	CHOICE {
 		aaSet		[0] IMPLICIT SetOfAuthenticatedAttribute
@@ -88,6 +88,12 @@ SignerInfo ::= SEQUENCE {
 	} OPTIONAL
 } ({ pkcs7_note_signed_info })
 
+SignerIdentifier ::= CHOICE {
+	-- RFC5652 sec 5.3
+	issuerAndSerialNumber IssuerAndSerialNumber,
+        subjectKeyIdentifier [0] IMPLICIT SubjectKeyIdentifier
+}
+
 IssuerAndSerialNumber ::= SEQUENCE {
 	issuer			Name ({ pkcs7_sig_note_issuer }),
 	serialNumber		CertificateSerialNumber ({ pkcs7_sig_note_serial })
@@ -95,6 +101,8 @@ IssuerAndSerialNumber ::= SEQUENCE {
 
 CertificateSerialNumber ::= INTEGER
 
+SubjectKeyIdentifier ::= OCTET STRING ({ pkcs7_sig_note_skid })
+
 SetOfAuthenticatedAttribute ::= SET OF AuthenticatedAttribute
 
 AuthenticatedAttribute ::= SEQUENCE {
diff --git a/crypto/asymmetric_keys/pkcs7_parser.c b/crypto/asymmetric_keys/pkcs7_parser.c
index ab427f04b2992e..826e2f3f507bd8 100644
--- a/crypto/asymmetric_keys/pkcs7_parser.c
+++ b/crypto/asymmetric_keys/pkcs7_parser.c
@@ -33,6 +33,9 @@ struct pkcs7_parse_context {
 	unsigned	raw_serial_size;
 	unsigned	raw_issuer_size;
 	const void	*raw_issuer;
+	const void	*raw_skid;
+	unsigned	raw_skid_size;
+	bool		expect_skid;
 };
 
 /*
@@ -249,15 +252,21 @@ int pkcs7_note_signeddata_version(void *context, size_t hdrlen,
 				  unsigned char tag,
 				  const void *value, size_t vlen)
 {
+	struct pkcs7_parse_context *ctx = context;
 	unsigned version;
 
 	if (vlen != 1)
 		goto unsupported;
 
-	version = *(const u8 *)value;
+	ctx->msg->version = version = *(const u8 *)value;
 	switch (version) {
 	case 1:
-		/* PKCS#7 SignedData [RFC2315 sec 9.1] */
+		/* PKCS#7 SignedData [RFC2315 sec 9.1]
+		 * CMS ver 1 SignedData [RFC5652 sec 5.1]
+		 */
+		break;
+	case 3:
+		/* CMS ver 3 SignedData [RFC2315 sec 5.1] */
 		break;
 	default:
 		goto unsupported;
@@ -277,6 +286,7 @@ int pkcs7_note_signerinfo_version(void *context, size_t hdrlen,
 				  unsigned char tag,
 				  const void *value, size_t vlen)
 {
+	struct pkcs7_parse_context *ctx = context;
 	unsigned version;
 
 	if (vlen != 1)
@@ -285,7 +295,18 @@ int pkcs7_note_signerinfo_version(void *context, size_t hdrlen,
 	version = *(const u8 *)value;
 	switch (version) {
 	case 1:
-		/* PKCS#7 SignerInfo [RFC2315 sec 9.2] */
+		/* PKCS#7 SignerInfo [RFC2315 sec 9.2]
+		 * CMS ver 1 SignerInfo [RFC5652 sec 5.3]
+		 */
+		if (ctx->msg->version != 1)
+			goto version_mismatch;
+		ctx->expect_skid = false;
+		break;
+	case 3:
+		/* CMS ver 3 SignerInfo [RFC2315 sec 5.3] */
+		if (ctx->msg->version == 1)
+			goto version_mismatch;
+		ctx->expect_skid = true;
 		break;
 	default:
 		goto unsupported;
@@ -296,6 +317,9 @@ int pkcs7_note_signerinfo_version(void *context, size_t hdrlen,
 unsupported:
 	pr_warn("Unsupported SignerInfo version\n");
 	return -EINVAL;
+version_mismatch:
+	pr_warn("SignedData-SignerInfo version mismatch\n");
+	return -EBADMSG;
 }
 
 /*
@@ -439,6 +463,22 @@ int pkcs7_sig_note_issuer(void *context, size_t hdrlen,
 	return 0;
 }
 
+/*
+ * Note the issuing cert's subjectKeyIdentifier
+ */
+int pkcs7_sig_note_skid(void *context, size_t hdrlen,
+			unsigned char tag,
+			const void *value, size_t vlen)
+{
+	struct pkcs7_parse_context *ctx = context;
+
+	pr_devel("SKID: %02x %zu [%*ph]\n", tag, vlen, (unsigned)vlen, value);
+
+	ctx->raw_skid = value;
+	ctx->raw_skid_size = vlen;
+	return 0;
+}
+
 /*
  * Note the signature data
  */
@@ -472,13 +512,21 @@ int pkcs7_note_signed_info(void *context, size_t hdrlen,
 	struct asymmetric_key_id *kid;
 
 	/* Generate cert issuer + serial number key ID */
-	kid = asymmetric_key_generate_id(ctx->raw_serial,
-					 ctx->raw_serial_size,
-					 ctx->raw_issuer,
-					 ctx->raw_issuer_size);
+	if (!ctx->expect_skid) {
+		kid = asymmetric_key_generate_id(ctx->raw_serial,
+						 ctx->raw_serial_size,
+						 ctx->raw_issuer,
+						 ctx->raw_issuer_size);
+	} else {
+		kid = asymmetric_key_generate_id(ctx->raw_skid,
+						 ctx->raw_skid_size,
+						 "", 0);
+	}
 	if (IS_ERR(kid))
 		return PTR_ERR(kid);
 
+	pr_devel("SINFO KID: %u [%*phN]\n", kid->len, kid->len, kid->data);
+
 	sinfo->signing_cert_id = kid;
 	sinfo->index = ++ctx->sinfo_index;
 	*ctx->ppsinfo = sinfo;
diff --git a/crypto/asymmetric_keys/pkcs7_parser.h b/crypto/asymmetric_keys/pkcs7_parser.h
index efc7dc9b8f9cfb..790dd7cec82c36 100644
--- a/crypto/asymmetric_keys/pkcs7_parser.h
+++ b/crypto/asymmetric_keys/pkcs7_parser.h
@@ -33,7 +33,9 @@ struct pkcs7_signed_info {
 	unsigned	authattrs_len;
 	const void	*authattrs;
 
-	/* Issuing cert serial number and issuer's name */
+	/* Issuing cert serial number and issuer's name [PKCS#7 or CMS ver 1]
+	 * or issuing cert's SKID [CMS ver 3].
+	 */
 	struct asymmetric_key_id *signing_cert_id;
 
 	/* Message signature.
@@ -50,6 +52,7 @@ struct pkcs7_message {
 	struct x509_certificate *certs;	/* Certificate list */
 	struct x509_certificate *crl;	/* Revocation list */
 	struct pkcs7_signed_info *signed_infos;
+	u8		version;	/* Version of cert (1 -> PKCS#7 or CMS; 3 -> CMS) */
 
 	/* Content Data (or NULL) */
 	enum OID	data_type;	/* Type of Data */

From ed8c20762a314124cbdd62e9d3e8aa7aa2a16020 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 20 Jul 2015 21:16:33 +0100
Subject: [PATCH 217/734] sign-file: Generate CMS message as signature instead
 of PKCS#7

Make sign-file use the OpenSSL CMS routines to generate a message to be
used as the signature blob instead of the PKCS#7 routines.  This allows us
to change how the matching X.509 certificate is selected.  With PKCS#7 the
only option is to match on the serial number and issuer fields of an X.509
certificate; with CMS, we also have the option of matching by subjectKeyId
extension.  The new behaviour is selected with the "-k" flag.

Without the -k flag specified, the output is pretty much identical to the
PKCS#7 output.

Whilst we're at it, don't include the S/MIME capability list in the message
as it's irrelevant to us.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-By: David Woodhouse <David.Woodhouse@intel.com
---
 scripts/sign-file.c | 51 ++++++++++++++++++++++++---------------------
 1 file changed, 27 insertions(+), 24 deletions(-)

diff --git a/scripts/sign-file.c b/scripts/sign-file.c
index ad0aa21bd3ac85..de213e5c0cd3be 100755
--- a/scripts/sign-file.c
+++ b/scripts/sign-file.c
@@ -20,7 +20,7 @@
 #include <openssl/bio.h>
 #include <openssl/evp.h>
 #include <openssl/pem.h>
-#include <openssl/pkcs7.h>
+#include <openssl/cms.h>
 #include <openssl/err.h>
 #include <openssl/engine.h>
 
@@ -107,13 +107,14 @@ int main(int argc, char **argv)
 	struct module_signature sig_info = { .id_type = PKEY_ID_PKCS7 };
 	char *hash_algo = NULL;
 	char *private_key_name, *x509_name, *module_name, *dest_name;
-	bool save_pkcs7 = false, replace_orig;
+	bool save_cms = false, replace_orig;
 	bool sign_only = false;
 	unsigned char buf[4096];
-	unsigned long module_size, pkcs7_size;
+	unsigned long module_size, cms_size;
+	unsigned int use_keyid = 0;
 	const EVP_MD *digest_algo;
 	EVP_PKEY *private_key;
-	PKCS7 *pkcs7;
+	CMS_ContentInfo *cms;
 	X509 *x509;
 	BIO *b, *bd = NULL, *bm;
 	int opt, n;
@@ -125,10 +126,11 @@ int main(int argc, char **argv)
 	key_pass = getenv("KBUILD_SIGN_PIN");
 
 	do {
-		opt = getopt(argc, argv, "dp");
+		opt = getopt(argc, argv, "dpk");
 		switch (opt) {
-		case 'p': save_pkcs7 = true; break;
-		case 'd': sign_only = true; save_pkcs7 = true; break;
+		case 'p': save_cms = true; break;
+		case 'd': sign_only = true; save_cms = true; break;
+		case 'k': use_keyid = CMS_USE_KEYID; break;
 		case -1: break;
 		default: format();
 		}
@@ -208,23 +210,24 @@ int main(int argc, char **argv)
 	bm = BIO_new_file(module_name, "rb");
 	ERR(!bm, "%s", module_name);
 
-	/* Load the PKCS#7 message from the digest buffer. */
-	pkcs7 = PKCS7_sign(NULL, NULL, NULL, NULL,
-			   PKCS7_NOCERTS | PKCS7_PARTIAL | PKCS7_BINARY | PKCS7_DETACHED | PKCS7_STREAM);
-	ERR(!pkcs7, "PKCS7_sign");
+	/* Load the CMS message from the digest buffer. */
+	cms = CMS_sign(NULL, NULL, NULL, NULL,
+		       CMS_NOCERTS | CMS_PARTIAL | CMS_BINARY | CMS_DETACHED | CMS_STREAM);
+	ERR(!cms, "CMS_sign");
 
-	ERR(!PKCS7_sign_add_signer(pkcs7, x509, private_key, digest_algo, PKCS7_NOCERTS | PKCS7_BINARY),
-	    "PKCS7_sign_add_signer");
-	ERR(PKCS7_final(pkcs7, bm, PKCS7_NOCERTS | PKCS7_BINARY) < 0,
-	    "PKCS7_final");
+	ERR(!CMS_add1_signer(cms, x509, private_key, digest_algo,
+			     CMS_NOCERTS | CMS_BINARY | CMS_NOSMIMECAP | use_keyid),
+	    "CMS_sign_add_signer");
+	ERR(CMS_final(cms, bm, NULL, CMS_NOCERTS | CMS_BINARY) < 0,
+	    "CMS_final");
 
-	if (save_pkcs7) {
-		char *pkcs7_name;
+	if (save_cms) {
+		char *cms_name;
 
-		ERR(asprintf(&pkcs7_name, "%s.pkcs7", module_name) < 0, "asprintf");
-		b = BIO_new_file(pkcs7_name, "wb");
-		ERR(!b, "%s", pkcs7_name);
-		ERR(i2d_PKCS7_bio_stream(b, pkcs7, NULL, 0) < 0, "%s", pkcs7_name);
+		ERR(asprintf(&cms_name, "%s.p7s", module_name) < 0, "asprintf");
+		b = BIO_new_file(cms_name, "wb");
+		ERR(!b, "%s", cms_name);
+		ERR(i2d_CMS_bio_stream(b, cms, NULL, 0) < 0, "%s", cms_name);
 		BIO_free(b);
 	}
 
@@ -240,9 +243,9 @@ int main(int argc, char **argv)
 	ERR(n < 0, "%s", module_name);
 	module_size = BIO_number_written(bd);
 
-	ERR(i2d_PKCS7_bio_stream(bd, pkcs7, NULL, 0) < 0, "%s", dest_name);
-	pkcs7_size = BIO_number_written(bd) - module_size;
-	sig_info.sig_len = htonl(pkcs7_size);
+	ERR(i2d_CMS_bio_stream(bd, cms, NULL, 0) < 0, "%s", dest_name);
+	cms_size = BIO_number_written(bd) - module_size;
+	sig_info.sig_len = htonl(cms_size);
 	ERR(BIO_write(bd, &sig_info, sizeof(sig_info)) < 0, "%s", dest_name);
 	ERR(BIO_write(bd, magic_number, sizeof(magic_number) - 1) < 0, "%s", dest_name);
 

From 84706caae9e06363db4f956cde4f9715ce5c0ef3 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Mon, 20 Jul 2015 21:16:33 +0100
Subject: [PATCH 218/734] extract-cert: Cope with multiple X.509 certificates
 in a single file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is not required for the module signing key, although it doesn't do any
harm — it just means that any additional certs in the PEM file are also
trusted by the kernel.

But it does allow us to use the extract-cert tool for processing the extra
certs from CONFIG_SYSTEM_TRUSTED_KEYS, instead of that horrid awk|base64
hack.

Also cope with being invoked with no input file, creating an empty output
file as a result.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 scripts/extract-cert.c | 58 +++++++++++++++++++++++++++++++++---------
 1 file changed, 46 insertions(+), 12 deletions(-)

diff --git a/scripts/extract-cert.c b/scripts/extract-cert.c
index 4fd5b2f07b459e..fd0db015c65c27 100644
--- a/scripts/extract-cert.c
+++ b/scripts/extract-cert.c
@@ -73,17 +73,34 @@ static void drain_openssl_errors(void)
 	} while(0)
 
 static const char *key_pass;
+static BIO *wb;
+static char *cert_dst;
+int kbuild_verbose;
+
+static void write_cert(X509 *x509)
+{
+	char buf[200];
+
+	if (!wb) {
+		wb = BIO_new_file(cert_dst, "wb");
+		ERR(!wb, "%s", cert_dst);
+	}
+	X509_NAME_oneline(X509_get_subject_name(x509), buf, sizeof(buf));
+	ERR(!i2d_X509_bio(wb, x509), cert_dst);
+	if (kbuild_verbose)
+		fprintf(stderr, "Extracted cert: %s\n", buf);
+}
 
 int main(int argc, char **argv)
 {
-	char *cert_src, *cert_dst;
-	X509 *x509;
-	BIO *b;
+	char *cert_src;
 
 	OpenSSL_add_all_algorithms();
 	ERR_load_crypto_strings();
 	ERR_clear_error();
 
+	kbuild_verbose = atoi(getenv("KBUILD_VERBOSE")?:"0");
+
         key_pass = getenv("KBUILD_SIGN_PIN");
 
 	if (argc != 3)
@@ -92,7 +109,13 @@ int main(int argc, char **argv)
 	cert_src = argv[1];
 	cert_dst = argv[2];
 
-	if (!strncmp(cert_src, "pkcs11:", 7)) {
+	if (!cert_src[0]) {
+		/* Invoked with no input; create empty file */
+		FILE *f = fopen(cert_dst, "wb");
+		ERR(!f, "%s", cert_dst);
+		fclose(f);
+		exit(0);
+	} else if (!strncmp(cert_src, "pkcs11:", 7)) {
 		ENGINE *e;
 		struct {
 			const char *cert_id;
@@ -114,19 +137,30 @@ int main(int argc, char **argv)
 			ERR(!ENGINE_ctrl_cmd_string(e, "PIN", key_pass, 0), "Set PKCS#11 PIN");
 		ENGINE_ctrl_cmd(e, "LOAD_CERT_CTRL", 0, &parms, NULL, 1);
 		ERR(!parms.cert, "Get X.509 from PKCS#11");
-		x509 = parms.cert;
+		write_cert(parms.cert);
 	} else {
+		BIO *b;
+		X509 *x509;
+
 		b = BIO_new_file(cert_src, "rb");
 		ERR(!b, "%s", cert_src);
-		x509 = PEM_read_bio_X509(b, NULL, NULL, NULL);
-		ERR(!x509, "%s", cert_src);
-		BIO_free(b);
+
+		while (1) {
+			x509 = PEM_read_bio_X509(b, NULL, NULL, NULL);
+			if (wb && !x509) {
+				unsigned long err = ERR_peek_last_error();
+				if (ERR_GET_LIB(err) == ERR_LIB_PEM &&
+				    ERR_GET_REASON(err) == PEM_R_NO_START_LINE) {
+					ERR_clear_error();
+					break;
+				}
+			}
+			ERR(!x509, "%s", cert_src);
+			write_cert(x509);
+		}
 	}
 
-	b = BIO_new_file(cert_dst, "wb");
-	ERR(!b, "%s", cert_dst);
-	ERR(!i2d_X509_bio(b, x509), cert_dst);
-	BIO_free(b);
+	BIO_free(wb);
 
 	return 0;
 }

From 770f2b98760ef0500183d7206724aac762433e2d Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Mon, 20 Jul 2015 21:16:34 +0100
Subject: [PATCH 219/734] modsign: Use extract-cert to process
 CONFIG_SYSTEM_TRUSTED_KEYS

Fix up the dependencies somewhat too, while we're at it.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 kernel/Makefile              | 25 ++++++++++++-------------
 kernel/system_certificates.S |  3 +++
 scripts/Makefile             |  3 ++-
 3 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/kernel/Makefile b/kernel/Makefile
index 575329777d9e2b..65ef3846fbe89e 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -166,23 +166,22 @@ endef
 #
 ###############################################################################
 
-
 ifeq ($(CONFIG_SYSTEM_TRUSTED_KEYRING),y)
 
 $(eval $(call config_filename,SYSTEM_TRUSTED_KEYS))
 
-SIGNING_X509-$(CONFIG_MODULE_SIG) += signing_key.x509
-
-kernel/system_certificates.o: $(obj)/x509_certificate_list
+# GCC doesn't include .incbin files in -MD generated dependencies (PR#66871)
+$(obj)/system_certificates.o: $(obj)/x509_certificate_list
 
-quiet_cmd_x509certs  = CERTS   $(SIGNING_X509-y) $(patsubst "%",%,$(2))
-      cmd_x509certs  = ( cat $(SIGNING_X509-y) /dev/null; \
-			 awk '/-----BEGIN CERTIFICATE-----/{flag=1;next}/-----END CERTIFICATE-----/{flag=0}flag' $(2) /dev/null | base64 -d ) > $@ || ( rm $@; exit 1)
+# Cope with signing_key.x509 existing in $(srctree) not $(objtree)
+AFLAGS_system_certificates.o := -I$(srctree)
 
-targets += $(obj)/x509_certificate_list
-$(obj)/x509_certificate_list: $(SIGNING_X509-y) include/config/system/trusted/keys.h $(wildcard include/config/module/sig.h) $(SYSTEM_TRUSTED_KEYS_SRCPREFIX)$(SYSTEM_TRUSTED_KEYS_FILENAME)
-	$(call if_changed,x509certs,$(SYSTEM_TRUSTED_KEYS_SRCPREFIX)$(CONFIG_SYSTEM_TRUSTED_KEYS))
+quiet_cmd_extract_certs  = EXTRACT_CERTS   $(patsubst "%",%,$(2))
+      cmd_extract_certs  = scripts/extract-cert $(2) $@ || ( rm $@; exit 1)
 
+targets += x509_certificate_list
+$(obj)/x509_certificate_list: scripts/extract-cert $(SYSTEM_TRUSTED_KEYS_SRCPREFIX)$(SYSTEM_TRUSTED_KEYS_FILENAME) FORCE
+	$(call if_changed,extract_certs,$(SYSTEM_TRUSTED_KEYS_SRCPREFIX)$(CONFIG_SYSTEM_TRUSTED_KEYS))
 endif
 
 clean-files := x509_certificate_list .x509.list
@@ -248,9 +247,9 @@ ifeq ($(patsubst pkcs11:%,%,$(firstword $(MODULE_SIG_KEY_FILENAME))),$(firstword
 X509_DEP := $(MODULE_SIG_KEY_SRCPREFIX)$(MODULE_SIG_KEY_FILENAME)
 endif
 
-quiet_cmd_extract_der = SIGNING_CERT $(patsubst "%",%,$(2))
-      cmd_extract_der = scripts/extract-cert $(2) signing_key.x509
+# GCC PR#66871 again.
+$(obj)/system_certificates.o: signing_key.x509
 
 signing_key.x509: scripts/extract-cert include/config/module/sig/key.h $(X509_DEP)
-	$(call cmd,extract_der,$(MODULE_SIG_KEY_SRCPREFIX)$(CONFIG_MODULE_SIG_KEY))
+	$(call cmd,extract_certs,$(MODULE_SIG_KEY_SRCPREFIX)$(CONFIG_MODULE_SIG_KEY))
 endif
diff --git a/kernel/system_certificates.S b/kernel/system_certificates.S
index 3e9868d47535b4..6ba2f75e7ba5f1 100644
--- a/kernel/system_certificates.S
+++ b/kernel/system_certificates.S
@@ -7,6 +7,9 @@
 	.globl VMLINUX_SYMBOL(system_certificate_list)
 VMLINUX_SYMBOL(system_certificate_list):
 __cert_list_start:
+#ifdef CONFIG_MODULE_SIG
+	.incbin "signing_key.x509"
+#endif
 	.incbin "kernel/x509_certificate_list"
 __cert_list_end:
 
diff --git a/scripts/Makefile b/scripts/Makefile
index 236f683510bde3..1b2661712d449a 100644
--- a/scripts/Makefile
+++ b/scripts/Makefile
@@ -16,7 +16,8 @@ hostprogs-$(CONFIG_VT)           += conmakehash
 hostprogs-$(BUILD_C_RECORDMCOUNT) += recordmcount
 hostprogs-$(CONFIG_BUILDTIME_EXTABLE_SORT) += sortextable
 hostprogs-$(CONFIG_ASN1)	 += asn1_compiler
-hostprogs-$(CONFIG_MODULE_SIG)	 += sign-file extract-cert
+hostprogs-$(CONFIG_MODULE_SIG)	 += sign-file
+hostprogs-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += extract-cert
 
 HOSTCFLAGS_sortextable.o = -I$(srctree)/tools/include
 HOSTCFLAGS_asn1_compiler.o = -I$(srctree)/include

From fd19a3d195be23e8d9d0d66576b96ea25eea8323 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 29 Jul 2015 16:58:32 +0100
Subject: [PATCH 220/734] PKCS#7: Improve and export the X.509 ASN.1 time
 object decoder

Make the X.509 ASN.1 time object decoder fill in a time64_t rather than a
struct tm to make comparison easier (unfortunately, this makes readable
display less easy) and export it so that it can be used by the PKCS#7 code
too.

Further, tighten up its parsing to reject invalid dates (eg. weird
characters, non-existent hour numbers) and unsupported dates (eg. timezones
other than 'Z' or dates earlier than 1970).

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: David Woodhouse <David.Woodhouse@intel.com>
---
 crypto/asymmetric_keys/x509_cert_parser.c | 87 +++++++++++++++++------
 crypto/asymmetric_keys/x509_parser.h      |  7 +-
 crypto/asymmetric_keys/x509_public_key.c  |  9 +--
 3 files changed, 72 insertions(+), 31 deletions(-)

diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c
index 849fd760923ef5..af71878dc15bff 100644
--- a/crypto/asymmetric_keys/x509_cert_parser.c
+++ b/crypto/asymmetric_keys/x509_cert_parser.c
@@ -472,60 +472,105 @@ int x509_process_extension(void *context, size_t hdrlen,
 	return 0;
 }
 
-/*
- * Record a certificate time.
+/**
+ * x509_decode_time - Decode an X.509 time ASN.1 object
+ * @_t: The time to fill in
+ * @hdrlen: The length of the object header
+ * @tag: The object tag
+ * @value: The object value
+ * @vlen: The size of the object value
+ *
+ * Decode an ASN.1 universal time or generalised time field into a struct the
+ * kernel can handle and check it for validity.  The time is decoded thus:
+ *
+ *	[RFC5280 §4.1.2.5]
+ *	CAs conforming to this profile MUST always encode certificate validity
+ *	dates through the year 2049 as UTCTime; certificate validity dates in
+ *	2050 or later MUST be encoded as GeneralizedTime.  Conforming
+ *	applications MUST be able to process validity dates that are encoded in
+ *	either UTCTime or GeneralizedTime.
  */
-static int x509_note_time(struct tm *tm,  size_t hdrlen,
-			  unsigned char tag,
-			  const unsigned char *value, size_t vlen)
+int x509_decode_time(time64_t *_t,  size_t hdrlen,
+		     unsigned char tag,
+		     const unsigned char *value, size_t vlen)
 {
+	static const unsigned char month_lengths[] = { 31, 29, 31, 30, 31, 30,
+						       31, 31, 30, 31, 30, 31 };
 	const unsigned char *p = value;
+	unsigned year, mon, day, hour, min, sec, mon_len;
 
-#define dec2bin(X) ((X) - '0')
+#define dec2bin(X) ({ unsigned char x = (X) - '0'; if (x > 9) goto invalid_time; x; })
 #define DD2bin(P) ({ unsigned x = dec2bin(P[0]) * 10 + dec2bin(P[1]); P += 2; x; })
 
 	if (tag == ASN1_UNITIM) {
 		/* UTCTime: YYMMDDHHMMSSZ */
 		if (vlen != 13)
 			goto unsupported_time;
-		tm->tm_year = DD2bin(p);
-		if (tm->tm_year >= 50)
-			tm->tm_year += 1900;
+		year = DD2bin(p);
+		if (year >= 50)
+			year += 1900;
 		else
-			tm->tm_year += 2000;
+			year += 2000;
 	} else if (tag == ASN1_GENTIM) {
 		/* GenTime: YYYYMMDDHHMMSSZ */
 		if (vlen != 15)
 			goto unsupported_time;
-		tm->tm_year = DD2bin(p) * 100 + DD2bin(p);
+		year = DD2bin(p) * 100 + DD2bin(p);
+		if (year >= 1950 && year <= 2049)
+			goto invalid_time;
 	} else {
 		goto unsupported_time;
 	}
 
-	tm->tm_year -= 1900;
-	tm->tm_mon  = DD2bin(p) - 1;
-	tm->tm_mday = DD2bin(p);
-	tm->tm_hour = DD2bin(p);
-	tm->tm_min  = DD2bin(p);
-	tm->tm_sec  = DD2bin(p);
+	mon  = DD2bin(p);
+	day = DD2bin(p);
+	hour = DD2bin(p);
+	min  = DD2bin(p);
+	sec  = DD2bin(p);
 
 	if (*p != 'Z')
 		goto unsupported_time;
 
+	mon_len = month_lengths[mon];
+	if (mon == 2) {
+		if (year % 4 == 0) {
+			mon_len = 29;
+			if (year % 100 == 0) {
+				year /= 100;
+				if (year % 4 != 0)
+					mon_len = 28;
+			}
+		}
+	}
+
+	if (year < 1970 ||
+	    mon < 1 || mon > 12 ||
+	    day < 1 || day > mon_len ||
+	    hour < 0 || hour > 23 ||
+	    min < 0 || min > 59 ||
+	    sec < 0 || sec > 59)
+		goto invalid_time;
+	
+	*_t = mktime64(year, mon, day, hour, min, sec);
 	return 0;
 
 unsupported_time:
-	pr_debug("Got unsupported time [tag %02x]: '%*.*s'\n",
-		 tag, (int)vlen, (int)vlen, value);
+	pr_debug("Got unsupported time [tag %02x]: '%*phN'\n",
+		 tag, (int)vlen, value);
+	return -EBADMSG;
+invalid_time:
+	pr_debug("Got invalid time [tag %02x]: '%*phN'\n",
+		 tag, (int)vlen, value);
 	return -EBADMSG;
 }
+EXPORT_SYMBOL_GPL(x509_decode_time);
 
 int x509_note_not_before(void *context, size_t hdrlen,
 			 unsigned char tag,
 			 const void *value, size_t vlen)
 {
 	struct x509_parse_context *ctx = context;
-	return x509_note_time(&ctx->cert->valid_from, hdrlen, tag, value, vlen);
+	return x509_decode_time(&ctx->cert->valid_from, hdrlen, tag, value, vlen);
 }
 
 int x509_note_not_after(void *context, size_t hdrlen,
@@ -533,7 +578,7 @@ int x509_note_not_after(void *context, size_t hdrlen,
 			const void *value, size_t vlen)
 {
 	struct x509_parse_context *ctx = context;
-	return x509_note_time(&ctx->cert->valid_to, hdrlen, tag, value, vlen);
+	return x509_decode_time(&ctx->cert->valid_to, hdrlen, tag, value, vlen);
 }
 
 /*
diff --git a/crypto/asymmetric_keys/x509_parser.h b/crypto/asymmetric_keys/x509_parser.h
index dcdb5c94f5148b..1de01eaec88490 100644
--- a/crypto/asymmetric_keys/x509_parser.h
+++ b/crypto/asymmetric_keys/x509_parser.h
@@ -23,8 +23,8 @@ struct x509_certificate {
 	struct asymmetric_key_id *skid;		/* Subject + subjectKeyId (optional) */
 	struct asymmetric_key_id *akid_id;	/* CA AuthKeyId matching ->id (optional) */
 	struct asymmetric_key_id *akid_skid;	/* CA AuthKeyId matching ->skid (optional) */
-	struct tm	valid_from;
-	struct tm	valid_to;
+	time64_t	valid_from;
+	time64_t	valid_to;
 	const void	*tbs;			/* Signed data */
 	unsigned	tbs_size;		/* Size of signed data */
 	unsigned	raw_sig_size;		/* Size of sigature */
@@ -49,6 +49,9 @@ struct x509_certificate {
  */
 extern void x509_free_certificate(struct x509_certificate *cert);
 extern struct x509_certificate *x509_cert_parse(const void *data, size_t datalen);
+extern int x509_decode_time(time64_t *_t,  size_t hdrlen,
+			    unsigned char tag,
+			    const unsigned char *value, size_t vlen);
 
 /*
  * x509_public_key.c
diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c
index 6b060b290e778e..6d88dd15c98da8 100644
--- a/crypto/asymmetric_keys/x509_public_key.c
+++ b/crypto/asymmetric_keys/x509_public_key.c
@@ -302,14 +302,7 @@ static int x509_key_preparse(struct key_preparsed_payload *prep)
 	}
 
 	pr_devel("Cert Key Algo: %s\n", pkey_algo_name[cert->pub->pkey_algo]);
-	pr_devel("Cert Valid From: %04ld-%02d-%02d %02d:%02d:%02d\n",
-		 cert->valid_from.tm_year + 1900, cert->valid_from.tm_mon + 1,
-		 cert->valid_from.tm_mday, cert->valid_from.tm_hour,
-		 cert->valid_from.tm_min,  cert->valid_from.tm_sec);
-	pr_devel("Cert Valid To: %04ld-%02d-%02d %02d:%02d:%02d\n",
-		 cert->valid_to.tm_year + 1900, cert->valid_to.tm_mon + 1,
-		 cert->valid_to.tm_mday, cert->valid_to.tm_hour,
-		 cert->valid_to.tm_min,  cert->valid_to.tm_sec);
+	pr_devel("Cert Valid period: %lld-%lld\n", cert->valid_from, cert->valid_to);
 	pr_devel("Cert Signature: %s + %s\n",
 		 pkey_algo_name[cert->sig.pkey_algo],
 		 hash_algo_name[cert->sig.pkey_hash_algo]);

From f29299b4801076e14bb149cb2fc44bd8dc2f51cc Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 31 Jul 2015 11:43:23 +0100
Subject: [PATCH 221/734] KEYS: Add a name for PKEY_ID_PKCS7

Add a name for PKEY_ID_PKCS7 into the pkey_id_type_name array.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 crypto/asymmetric_keys/public_key.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c
index 2f6e4fb1a1ea14..81efccbe22d5b2 100644
--- a/crypto/asymmetric_keys/public_key.c
+++ b/crypto/asymmetric_keys/public_key.c
@@ -39,6 +39,7 @@ EXPORT_SYMBOL_GPL(pkey_algo);
 const char *const pkey_id_type_name[PKEY_ID_TYPE__LAST] = {
 	[PKEY_ID_PGP]		= "PGP",
 	[PKEY_ID_X509]		= "X509",
+	[PKEY_ID_PKCS7]		= "PKCS#7",
 };
 EXPORT_SYMBOL_GPL(pkey_id_type_name);
 

From 99db44350672c8a5ee9a7b0a6f4cd6ff10136065 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 5 Aug 2015 15:22:27 +0100
Subject: [PATCH 222/734] PKCS#7: Appropriately restrict authenticated
 attributes and content type

A PKCS#7 or CMS message can have per-signature authenticated attributes
that are digested as a lump and signed by the authorising key for that
signature.  If such attributes exist, the content digest isn't itself
signed, but rather it is included in a special authattr which then
contributes to the signature.

Further, we already require the master message content type to be
pkcs7_signedData - but there's also a separate content type for the data
itself within the SignedData object and this must be repeated inside the
authattrs for each signer [RFC2315 9.2, RFC5652 11.1].

We should really validate the authattrs if they exist or forbid them
entirely as appropriate.  To this end:

 (1) Alter the PKCS#7 parser to reject any message that has more than one
     signature where at least one signature has authattrs and at least one
     that does not.

 (2) Validate authattrs if they are present and strongly restrict them.
     Only the following authattrs are permitted and all others are
     rejected:

     (a) contentType.  This is checked to be an OID that matches the
     	 content type in the SignedData object.

     (b) messageDigest.  This must match the crypto digest of the data.

     (c) signingTime.  If present, we check that this is a valid, parseable
     	 UTCTime or GeneralTime and that the date it encodes fits within
     	 the validity window of the matching X.509 cert.

     (d) S/MIME capabilities.  We don't check the contents.

     (e) Authenticode SP Opus Info.  We don't check the contents.

     (f) Authenticode Statement Type.  We don't check the contents.

     The message is rejected if (a) or (b) are missing.  If the message is
     an Authenticode type, the message is rejected if (e) is missing; if
     not Authenticode, the message is rejected if (d) - (f) are present.

     The S/MIME capabilities authattr (d) unfortunately has to be allowed
     to support kernels already signed by the pesign program.  This only
     affects kexec.  sign-file suppresses them (CMS_NOSMIMECAP).

     The message is also rejected if an authattr is given more than once or
     if it contains more than one element in its set of values.

 (3) Add a parameter to pkcs7_verify() to select one of the following
     restrictions and pass in the appropriate option from the callers:

     (*) VERIFYING_MODULE_SIGNATURE

	 This requires that the SignedData content type be pkcs7-data and
	 forbids authattrs.  sign-file sets CMS_NOATTR.  We could be more
	 flexible and permit authattrs optionally, but only permit minimal
	 content.

     (*) VERIFYING_FIRMWARE_SIGNATURE

	 This requires that the SignedData content type be pkcs7-data and
	 requires authattrs.  In future, this will require an attribute
	 holding the target firmware name in addition to the minimal set.

     (*) VERIFYING_UNSPECIFIED_SIGNATURE

	 This requires that the SignedData content type be pkcs7-data but
	 allows either no authattrs or only permits the minimal set.

     (*) VERIFYING_KEXEC_PE_SIGNATURE

	 This only supports the Authenticode SPC_INDIRECT_DATA content type
	 and requires at least an SpcSpOpusInfo authattr in addition to the
	 minimal set.  It also permits an SPC_STATEMENT_TYPE authattr (and
	 an S/MIME capabilities authattr because the pesign program doesn't
	 remove these).

     (*) VERIFYING_KEY_SIGNATURE
     (*) VERIFYING_KEY_SELF_SIGNATURE

	 These are invalid in this context but are included for later use
	 when limiting the use of X.509 certs.

 (4) The pkcs7_test key type is given a module parameter to select between
     the above options for testing purposes.  For example:

	echo 1 >/sys/module/pkcs7_test_key/parameters/usage
	keyctl padd pkcs7_test foo @s </tmp/stuff.pkcs7

     will attempt to check the signature on stuff.pkcs7 as if it contains a
     firmware blob (1 being VERIFYING_FIRMWARE_SIGNATURE).

Suggested-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: David Woodhouse <David.Woodhouse@intel.com>
---
 arch/x86/kernel/kexec-bzimage64.c        |   4 +-
 crypto/asymmetric_keys/asymmetric_type.c |  11 ++
 crypto/asymmetric_keys/pkcs7.asn1        |   6 +-
 crypto/asymmetric_keys/pkcs7_key_type.c  |  14 ++-
 crypto/asymmetric_keys/pkcs7_parser.c    | 138 ++++++++++++++++++++++-
 crypto/asymmetric_keys/pkcs7_parser.h    |  15 ++-
 crypto/asymmetric_keys/pkcs7_verify.c    |  65 ++++++++++-
 crypto/asymmetric_keys/verify_pefile.c   |   7 +-
 include/crypto/pkcs7.h                   |  10 +-
 include/crypto/public_key.h              |  14 +++
 include/keys/system_keyring.h            |   4 +-
 include/linux/oid_registry.h             |   4 +-
 include/linux/verify_pefile.h            |   6 +-
 kernel/module_signing.c                  |   3 +-
 kernel/system_keyring.c                  |   6 +-
 scripts/sign-file.c                      |   5 +-
 16 files changed, 285 insertions(+), 27 deletions(-)

diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index ca83f7ac388bec..fab22e72808c07 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -536,7 +536,9 @@ static int bzImage64_verify_sig(const char *kernel, unsigned long kernel_len)
 	int ret;
 
 	ret = verify_pefile_signature(kernel, kernel_len,
-				      system_trusted_keyring, &trusted);
+				      system_trusted_keyring,
+				      VERIFYING_KEXEC_PE_SIGNATURE,
+				      &trusted);
 	if (ret < 0)
 		return ret;
 	if (!trusted)
diff --git a/crypto/asymmetric_keys/asymmetric_type.c b/crypto/asymmetric_keys/asymmetric_type.c
index b0e4ed23d6683c..1916680ad81b08 100644
--- a/crypto/asymmetric_keys/asymmetric_type.c
+++ b/crypto/asymmetric_keys/asymmetric_type.c
@@ -12,6 +12,7 @@
  */
 #include <keys/asymmetric-subtype.h>
 #include <keys/asymmetric-parser.h>
+#include <crypto/public_key.h>
 #include <linux/seq_file.h>
 #include <linux/module.h>
 #include <linux/slab.h>
@@ -20,6 +21,16 @@
 
 MODULE_LICENSE("GPL");
 
+const char *const key_being_used_for[NR__KEY_BEING_USED_FOR] = {
+	[VERIFYING_MODULE_SIGNATURE]		= "mod sig",
+	[VERIFYING_FIRMWARE_SIGNATURE]		= "firmware sig",
+	[VERIFYING_KEXEC_PE_SIGNATURE]		= "kexec PE sig",
+	[VERIFYING_KEY_SIGNATURE]		= "key sig",
+	[VERIFYING_KEY_SELF_SIGNATURE]		= "key self sig",
+	[VERIFYING_UNSPECIFIED_SIGNATURE]	= "unspec sig",
+};
+EXPORT_SYMBOL_GPL(key_being_used_for);
+
 static LIST_HEAD(asymmetric_key_parsers);
 static DECLARE_RWSEM(asymmetric_key_parsers_sem);
 
diff --git a/crypto/asymmetric_keys/pkcs7.asn1 b/crypto/asymmetric_keys/pkcs7.asn1
index 6bf8ff4f7414ee..1eca740b816ace 100644
--- a/crypto/asymmetric_keys/pkcs7.asn1
+++ b/crypto/asymmetric_keys/pkcs7.asn1
@@ -8,7 +8,7 @@ ContentType ::= OBJECT IDENTIFIER ({ pkcs7_note_OID })
 SignedData ::= SEQUENCE {
 	version			INTEGER ({ pkcs7_note_signeddata_version }),
 	digestAlgorithms	DigestAlgorithmIdentifiers,
-	contentInfo		ContentInfo,
+	contentInfo		ContentInfo ({ pkcs7_note_content }),
 	certificates		CHOICE {
 		certSet		[0] IMPLICIT ExtendedCertificatesAndCertificates,
 		certSequence	[2] IMPLICIT Certificates
@@ -21,7 +21,7 @@ SignedData ::= SEQUENCE {
 }
 
 ContentInfo ::= SEQUENCE {
-	contentType	ContentType,
+	contentType	ContentType ({ pkcs7_note_OID }),
 	content		[0] EXPLICIT Data OPTIONAL
 }
 
@@ -111,7 +111,7 @@ AuthenticatedAttribute ::= SEQUENCE {
 }
 
 UnauthenticatedAttribute ::= SEQUENCE {
-	type			OBJECT IDENTIFIER ({ pkcs7_note_OID }),
+	type			OBJECT IDENTIFIER,
 	values			SET OF ANY
 }
 
diff --git a/crypto/asymmetric_keys/pkcs7_key_type.c b/crypto/asymmetric_keys/pkcs7_key_type.c
index 3d13b042da7358..10d34dbd00b9b1 100644
--- a/crypto/asymmetric_keys/pkcs7_key_type.c
+++ b/crypto/asymmetric_keys/pkcs7_key_type.c
@@ -14,16 +14,23 @@
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/key-type.h>
+#include <keys/asymmetric-type.h>
 #include <crypto/pkcs7.h>
 #include <keys/user-type.h>
 #include <keys/system_keyring.h>
 #include "pkcs7_parser.h"
 
+static unsigned pkcs7_usage;
+module_param_named(usage, pkcs7_usage, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(pkcs7_usage,
+		 "Usage to specify when verifying the PKCS#7 message");
+
 /*
  * Preparse a PKCS#7 wrapped and validated data blob.
  */
 static int pkcs7_preparse(struct key_preparsed_payload *prep)
 {
+	enum key_being_used_for usage = pkcs7_usage;
 	struct pkcs7_message *pkcs7;
 	const void *data, *saved_prep_data;
 	size_t datalen, saved_prep_datalen;
@@ -32,6 +39,11 @@ static int pkcs7_preparse(struct key_preparsed_payload *prep)
 
 	kenter("");
 
+	if (usage >= NR__KEY_BEING_USED_FOR) {
+		pr_err("Invalid usage type %d\n", usage);
+		return -EINVAL;
+	}
+
 	saved_prep_data = prep->data;
 	saved_prep_datalen = prep->datalen;
 	pkcs7 = pkcs7_parse_message(saved_prep_data, saved_prep_datalen);
@@ -40,7 +52,7 @@ static int pkcs7_preparse(struct key_preparsed_payload *prep)
 		goto error;
 	}
 
-	ret = pkcs7_verify(pkcs7);
+	ret = pkcs7_verify(pkcs7, usage);
 	if (ret < 0)
 		goto error_free;
 
diff --git a/crypto/asymmetric_keys/pkcs7_parser.c b/crypto/asymmetric_keys/pkcs7_parser.c
index 826e2f3f507bd8..e6298b7a945a97 100644
--- a/crypto/asymmetric_keys/pkcs7_parser.c
+++ b/crypto/asymmetric_keys/pkcs7_parser.c
@@ -81,6 +81,30 @@ void pkcs7_free_message(struct pkcs7_message *pkcs7)
 }
 EXPORT_SYMBOL_GPL(pkcs7_free_message);
 
+/*
+ * Check authenticatedAttributes are provided or not provided consistently.
+ */
+static int pkcs7_check_authattrs(struct pkcs7_message *msg)
+{
+	struct pkcs7_signed_info *sinfo;
+	bool want;
+
+	sinfo = msg->signed_infos;
+	if (sinfo->authattrs) {
+		want = true;
+		msg->have_authattrs = true;
+	}
+
+	for (sinfo = sinfo->next; sinfo; sinfo = sinfo->next)
+		if (!!sinfo->authattrs != want)
+			goto inconsistent;
+	return 0;
+
+inconsistent:
+	pr_warn("Inconsistently supplied authAttrs\n");
+	return -EINVAL;
+}
+
 /**
  * pkcs7_parse_message - Parse a PKCS#7 message
  * @data: The raw binary ASN.1 encoded message to be parsed
@@ -113,6 +137,10 @@ struct pkcs7_message *pkcs7_parse_message(const void *data, size_t datalen)
 		goto out;
 	}
 
+	ret = pkcs7_check_authattrs(ctx->msg);
+	if (ret < 0)
+		goto out;
+
 	msg = ctx->msg;
 	ctx->msg = NULL;
 
@@ -380,6 +408,25 @@ int pkcs7_note_certificate_list(void *context, size_t hdrlen,
 	return 0;
 }
 
+/*
+ * Note the content type.
+ */
+int pkcs7_note_content(void *context, size_t hdrlen,
+		       unsigned char tag,
+		       const void *value, size_t vlen)
+{
+	struct pkcs7_parse_context *ctx = context;
+
+	if (ctx->last_oid != OID_data &&
+	    ctx->last_oid != OID_msIndirectData) {
+		pr_warn("Unsupported data type %d\n", ctx->last_oid);
+		return -EINVAL;
+	}
+
+	ctx->msg->data_type = ctx->last_oid;
+	return 0;
+}
+
 /*
  * Extract the data from the message and store that and its content type OID in
  * the context.
@@ -395,31 +442,90 @@ int pkcs7_note_data(void *context, size_t hdrlen,
 	ctx->msg->data = value;
 	ctx->msg->data_len = vlen;
 	ctx->msg->data_hdrlen = hdrlen;
-	ctx->msg->data_type = ctx->last_oid;
 	return 0;
 }
 
 /*
- * Parse authenticated attributes
+ * Parse authenticated attributes.
  */
 int pkcs7_sig_note_authenticated_attr(void *context, size_t hdrlen,
 				      unsigned char tag,
 				      const void *value, size_t vlen)
 {
 	struct pkcs7_parse_context *ctx = context;
+	struct pkcs7_signed_info *sinfo = ctx->sinfo;
+	enum OID content_type;
 
 	pr_devel("AuthAttr: %02x %zu [%*ph]\n", tag, vlen, (unsigned)vlen, value);
 
 	switch (ctx->last_oid) {
+	case OID_contentType:
+		if (__test_and_set_bit(sinfo_has_content_type, &sinfo->aa_set))
+			goto repeated;
+		content_type = look_up_OID(value, vlen);
+		if (content_type != ctx->msg->data_type) {
+			pr_warn("Mismatch between global data type (%d) and sinfo %u (%d)\n",
+				ctx->msg->data_type, sinfo->index,
+				content_type);
+			return -EBADMSG;
+		}
+		return 0;
+
+	case OID_signingTime:
+		if (__test_and_set_bit(sinfo_has_signing_time, &sinfo->aa_set))
+			goto repeated;
+		/* Should we check that the signing time is consistent
+		 * with the signer's X.509 cert?
+		 */
+		return x509_decode_time(&sinfo->signing_time,
+					hdrlen, tag, value, vlen);
+
 	case OID_messageDigest:
+		if (__test_and_set_bit(sinfo_has_message_digest, &sinfo->aa_set))
+			goto repeated;
 		if (tag != ASN1_OTS)
 			return -EBADMSG;
-		ctx->sinfo->msgdigest = value;
-		ctx->sinfo->msgdigest_len = vlen;
+		sinfo->msgdigest = value;
+		sinfo->msgdigest_len = vlen;
+		return 0;
+
+	case OID_smimeCapabilites:
+		if (__test_and_set_bit(sinfo_has_smime_caps, &sinfo->aa_set))
+			goto repeated;
+		if (ctx->msg->data_type != OID_msIndirectData) {
+			pr_warn("S/MIME Caps only allowed with Authenticode\n");
+			return -EKEYREJECTED;
+		}
+		return 0;
+
+		/* Microsoft SpOpusInfo seems to be contain cont[0] 16-bit BE
+		 * char URLs and cont[1] 8-bit char URLs.
+		 *
+		 * Microsoft StatementType seems to contain a list of OIDs that
+		 * are also used as extendedKeyUsage types in X.509 certs.
+		 */
+	case OID_msSpOpusInfo:
+		if (__test_and_set_bit(sinfo_has_ms_opus_info, &sinfo->aa_set))
+			goto repeated;
+		goto authenticode_check;
+	case OID_msStatementType:
+		if (__test_and_set_bit(sinfo_has_ms_statement_type, &sinfo->aa_set))
+			goto repeated;
+	authenticode_check:
+		if (ctx->msg->data_type != OID_msIndirectData) {
+			pr_warn("Authenticode AuthAttrs only allowed with Authenticode\n");
+			return -EKEYREJECTED;
+		}
+		/* I'm not sure how to validate these */
 		return 0;
 	default:
 		return 0;
 	}
+
+repeated:
+	/* We permit max one item per AuthenticatedAttribute and no repeats */
+	pr_warn("Repeated/multivalue AuthAttrs not permitted\n");
+	return -EKEYREJECTED;
 }
 
 /*
@@ -430,10 +536,25 @@ int pkcs7_sig_note_set_of_authattrs(void *context, size_t hdrlen,
 				    const void *value, size_t vlen)
 {
 	struct pkcs7_parse_context *ctx = context;
+	struct pkcs7_signed_info *sinfo = ctx->sinfo;
+
+	if (!test_bit(sinfo_has_content_type, &sinfo->aa_set) ||
+	    !test_bit(sinfo_has_message_digest, &sinfo->aa_set) ||
+	    (ctx->msg->data_type == OID_msIndirectData &&
+	     !test_bit(sinfo_has_ms_opus_info, &sinfo->aa_set))) {
+		pr_warn("Missing required AuthAttr\n");
+		return -EBADMSG;
+	}
+
+	if (ctx->msg->data_type != OID_msIndirectData &&
+	    test_bit(sinfo_has_ms_opus_info, &sinfo->aa_set)) {
+		pr_warn("Unexpected Authenticode AuthAttr\n");
+		return -EBADMSG;
+	}
 
 	/* We need to switch the 'CONT 0' to a 'SET OF' when we digest */
-	ctx->sinfo->authattrs = value - (hdrlen - 1);
-	ctx->sinfo->authattrs_len = vlen + (hdrlen - 1);
+	sinfo->authattrs = value - (hdrlen - 1);
+	sinfo->authattrs_len = vlen + (hdrlen - 1);
 	return 0;
 }
 
@@ -511,6 +632,11 @@ int pkcs7_note_signed_info(void *context, size_t hdrlen,
 	struct pkcs7_signed_info *sinfo = ctx->sinfo;
 	struct asymmetric_key_id *kid;
 
+	if (ctx->msg->data_type == OID_msIndirectData && !sinfo->authattrs) {
+		pr_warn("Authenticode requires AuthAttrs\n");
+		return -EBADMSG;
+	}
+
 	/* Generate cert issuer + serial number key ID */
 	if (!ctx->expect_skid) {
 		kid = asymmetric_key_generate_id(ctx->raw_serial,
diff --git a/crypto/asymmetric_keys/pkcs7_parser.h b/crypto/asymmetric_keys/pkcs7_parser.h
index 790dd7cec82c36..a66b19ebcf47e4 100644
--- a/crypto/asymmetric_keys/pkcs7_parser.h
+++ b/crypto/asymmetric_keys/pkcs7_parser.h
@@ -21,9 +21,9 @@
 struct pkcs7_signed_info {
 	struct pkcs7_signed_info *next;
 	struct x509_certificate *signer; /* Signing certificate (in msg->certs) */
-	unsigned index;
-	bool trusted;
-	bool unsupported_crypto;	/* T if not usable due to missing crypto */
+	unsigned	index;
+	bool		trusted;
+	bool		unsupported_crypto;	/* T if not usable due to missing crypto */
 
 	/* Message digest - the digest of the Content Data (or NULL) */
 	const void	*msgdigest;
@@ -32,6 +32,14 @@ struct pkcs7_signed_info {
 	/* Authenticated Attribute data (or NULL) */
 	unsigned	authattrs_len;
 	const void	*authattrs;
+	unsigned long	aa_set;
+#define	sinfo_has_content_type		0
+#define	sinfo_has_signing_time		1
+#define	sinfo_has_message_digest	2
+#define sinfo_has_smime_caps		3
+#define	sinfo_has_ms_opus_info		4
+#define	sinfo_has_ms_statement_type	5
+	time64_t	signing_time;
 
 	/* Issuing cert serial number and issuer's name [PKCS#7 or CMS ver 1]
 	 * or issuing cert's SKID [CMS ver 3].
@@ -53,6 +61,7 @@ struct pkcs7_message {
 	struct x509_certificate *crl;	/* Revocation list */
 	struct pkcs7_signed_info *signed_infos;
 	u8		version;	/* Version of cert (1 -> PKCS#7 or CMS; 3 -> CMS) */
+	bool		have_authattrs;	/* T if have authattrs */
 
 	/* Content Data (or NULL) */
 	enum OID	data_type;	/* Type of Data */
diff --git a/crypto/asymmetric_keys/pkcs7_verify.c b/crypto/asymmetric_keys/pkcs7_verify.c
index 404f89a0f85252..d20c0b4b880ed1 100644
--- a/crypto/asymmetric_keys/pkcs7_verify.c
+++ b/crypto/asymmetric_keys/pkcs7_verify.c
@@ -70,9 +70,15 @@ static int pkcs7_digest(struct pkcs7_message *pkcs7,
 	 * message digest attribute amongst them which corresponds to the
 	 * digest we just calculated.
 	 */
-	if (sinfo->msgdigest) {
+	if (sinfo->authattrs) {
 		u8 tag;
 
+		if (!sinfo->msgdigest) {
+			pr_warn("Sig %u: No messageDigest\n", sinfo->index);
+			ret = -EKEYREJECTED;
+			goto error;
+		}
+
 		if (sinfo->msgdigest_len != sinfo->sig.digest_size) {
 			pr_debug("Sig %u: Invalid digest size (%u)\n",
 				 sinfo->index, sinfo->msgdigest_len);
@@ -314,6 +320,18 @@ static int pkcs7_verify_one(struct pkcs7_message *pkcs7,
 	pr_devel("Using X.509[%u] for sig %u\n",
 		 sinfo->signer->index, sinfo->index);
 
+	/* Check that the PKCS#7 signing time is valid according to the X.509
+	 * certificate.  We can't, however, check against the system clock
+	 * since that may not have been set yet and may be wrong.
+	 */
+	if (test_bit(sinfo_has_signing_time, &sinfo->aa_set)) {
+		if (sinfo->signing_time < sinfo->signer->valid_from ||
+		    sinfo->signing_time > sinfo->signer->valid_to) {
+			pr_warn("Message signed outside of X.509 validity window\n");
+			return -EKEYREJECTED;
+		}
+	}
+
 	/* Verify the PKCS#7 binary against the key */
 	ret = public_key_verify_signature(sinfo->signer->pub, &sinfo->sig);
 	if (ret < 0)
@@ -328,6 +346,7 @@ static int pkcs7_verify_one(struct pkcs7_message *pkcs7,
 /**
  * pkcs7_verify - Verify a PKCS#7 message
  * @pkcs7: The PKCS#7 message to be verified
+ * @usage: The use to which the key is being put
  *
  * Verify a PKCS#7 message is internally consistent - that is, the data digest
  * matches the digest in the AuthAttrs and any signature in the message or one
@@ -339,6 +358,9 @@ static int pkcs7_verify_one(struct pkcs7_message *pkcs7,
  *
  * Returns, in order of descending priority:
  *
+ *  (*) -EKEYREJECTED if a key was selected that had a usage restriction at
+ *      odds with the specified usage, or:
+ *
  *  (*) -EKEYREJECTED if a signature failed to match for which we found an
  *	appropriate X.509 certificate, or:
  *
@@ -350,7 +372,8 @@ static int pkcs7_verify_one(struct pkcs7_message *pkcs7,
  *  (*) 0 if all the signature chains that don't incur -ENOPKG can be verified
  *	(note that a signature chain may be of zero length), or:
  */
-int pkcs7_verify(struct pkcs7_message *pkcs7)
+int pkcs7_verify(struct pkcs7_message *pkcs7,
+		 enum key_being_used_for usage)
 {
 	struct pkcs7_signed_info *sinfo;
 	struct x509_certificate *x509;
@@ -359,6 +382,44 @@ int pkcs7_verify(struct pkcs7_message *pkcs7)
 
 	kenter("");
 
+	switch (usage) {
+	case VERIFYING_MODULE_SIGNATURE:
+		if (pkcs7->data_type != OID_data) {
+			pr_warn("Invalid module sig (not pkcs7-data)\n");
+			return -EKEYREJECTED;
+		}
+		if (pkcs7->have_authattrs) {
+			pr_warn("Invalid module sig (has authattrs)\n");
+			return -EKEYREJECTED;
+		}
+		break;
+	case VERIFYING_FIRMWARE_SIGNATURE:
+		if (pkcs7->data_type != OID_data) {
+			pr_warn("Invalid firmware sig (not pkcs7-data)\n");
+			return -EKEYREJECTED;
+		}
+		if (!pkcs7->have_authattrs) {
+			pr_warn("Invalid firmware sig (missing authattrs)\n");
+			return -EKEYREJECTED;
+		}
+		break;
+	case VERIFYING_KEXEC_PE_SIGNATURE:
+		if (pkcs7->data_type != OID_msIndirectData) {
+			pr_warn("Invalid kexec sig (not Authenticode)\n");
+			return -EKEYREJECTED;
+		}
+		/* Authattr presence checked in parser */
+		break;
+	case VERIFYING_UNSPECIFIED_SIGNATURE:
+		if (pkcs7->data_type != OID_data) {
+			pr_warn("Invalid unspecified sig (not pkcs7-data)\n");
+			return -EKEYREJECTED;
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+
 	for (n = 0, x509 = pkcs7->certs; x509; x509 = x509->next, n++) {
 		ret = x509_get_sig_params(x509);
 		if (ret < 0)
diff --git a/crypto/asymmetric_keys/verify_pefile.c b/crypto/asymmetric_keys/verify_pefile.c
index 2421f46184ce87..897b734dabf9ba 100644
--- a/crypto/asymmetric_keys/verify_pefile.c
+++ b/crypto/asymmetric_keys/verify_pefile.c
@@ -393,6 +393,7 @@ static int pefile_digest_pe(const void *pebuf, unsigned int pelen,
  * @pebuf: Buffer containing the PE binary image
  * @pelen: Length of the binary image
  * @trust_keyring: Signing certificates to use as starting points
+ * @usage: The use to which the key is being put.
  * @_trusted: Set to true if trustworth, false otherwise
  *
  * Validate that the certificate chain inside the PKCS#7 message inside the PE
@@ -417,7 +418,9 @@ static int pefile_digest_pe(const void *pebuf, unsigned int pelen,
  * May also return -ENOMEM.
  */
 int verify_pefile_signature(const void *pebuf, unsigned pelen,
-			    struct key *trusted_keyring, bool *_trusted)
+			    struct key *trusted_keyring,
+			    enum key_being_used_for usage,
+			    bool *_trusted)
 {
 	struct pkcs7_message *pkcs7;
 	struct pefile_context ctx;
@@ -462,7 +465,7 @@ int verify_pefile_signature(const void *pebuf, unsigned pelen,
 	if (ret < 0)
 		goto error;
 
-	ret = pkcs7_verify(pkcs7);
+	ret = pkcs7_verify(pkcs7, usage);
 	if (ret < 0)
 		goto error;
 
diff --git a/include/crypto/pkcs7.h b/include/crypto/pkcs7.h
index e235ab4957ee90..441aff9b5aa759 100644
--- a/include/crypto/pkcs7.h
+++ b/include/crypto/pkcs7.h
@@ -9,6 +9,11 @@
  * 2 of the Licence, or (at your option) any later version.
  */
 
+#ifndef _CRYPTO_PKCS7_H
+#define _CRYPTO_PKCS7_H
+
+#include <crypto/public_key.h>
+
 struct key;
 struct pkcs7_message;
 
@@ -33,7 +38,10 @@ extern int pkcs7_validate_trust(struct pkcs7_message *pkcs7,
 /*
  * pkcs7_verify.c
  */
-extern int pkcs7_verify(struct pkcs7_message *pkcs7);
+extern int pkcs7_verify(struct pkcs7_message *pkcs7,
+			enum key_being_used_for usage);
 
 extern int pkcs7_supply_detached_data(struct pkcs7_message *pkcs7,
 				      const void *data, size_t datalen);
+
+#endif /* _CRYPTO_PKCS7_H */
diff --git a/include/crypto/public_key.h b/include/crypto/public_key.h
index fda097e079a4cc..067c242b1e152d 100644
--- a/include/crypto/public_key.h
+++ b/include/crypto/public_key.h
@@ -39,6 +39,20 @@ enum pkey_id_type {
 
 extern const char *const pkey_id_type_name[PKEY_ID_TYPE__LAST];
 
+/*
+ * The use to which an asymmetric key is being put.
+ */
+enum key_being_used_for {
+	VERIFYING_MODULE_SIGNATURE,
+	VERIFYING_FIRMWARE_SIGNATURE,
+	VERIFYING_KEXEC_PE_SIGNATURE,
+	VERIFYING_KEY_SIGNATURE,
+	VERIFYING_KEY_SELF_SIGNATURE,
+	VERIFYING_UNSPECIFIED_SIGNATURE,
+	NR__KEY_BEING_USED_FOR
+};
+extern const char *const key_being_used_for[NR__KEY_BEING_USED_FOR];
+
 /*
  * Cryptographic data for the public-key subtype of the asymmetric key type.
  *
diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h
index 9791c907cdb70c..b20cd885c1fd84 100644
--- a/include/keys/system_keyring.h
+++ b/include/keys/system_keyring.h
@@ -15,6 +15,7 @@
 #ifdef CONFIG_SYSTEM_TRUSTED_KEYRING
 
 #include <linux/key.h>
+#include <crypto/public_key.h>
 
 extern struct key *system_trusted_keyring;
 static inline struct key *get_system_trusted_keyring(void)
@@ -30,7 +31,8 @@ static inline struct key *get_system_trusted_keyring(void)
 
 #ifdef CONFIG_SYSTEM_DATA_VERIFICATION
 extern int system_verify_data(const void *data, unsigned long len,
-			      const void *raw_pkcs7, size_t pkcs7_len);
+			      const void *raw_pkcs7, size_t pkcs7_len,
+			      enum key_being_used_for usage);
 #endif
 
 #endif /* _KEYS_SYSTEM_KEYRING_H */
diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h
index c2bbf672b84eb5..93e0ff92fb9b62 100644
--- a/include/linux/oid_registry.h
+++ b/include/linux/oid_registry.h
@@ -41,7 +41,7 @@ enum OID {
 	OID_signed_data,		/* 1.2.840.113549.1.7.2 */
 	/* PKCS#9 {iso(1) member-body(2) us(840) rsadsi(113549) pkcs(1) pkcs-9(9)} */
 	OID_email_address,		/* 1.2.840.113549.1.9.1 */
-	OID_content_type,		/* 1.2.840.113549.1.9.3 */
+	OID_contentType,		/* 1.2.840.113549.1.9.3 */
 	OID_messageDigest,		/* 1.2.840.113549.1.9.4 */
 	OID_signingTime,		/* 1.2.840.113549.1.9.5 */
 	OID_smimeCapabilites,		/* 1.2.840.113549.1.9.15 */
@@ -54,6 +54,8 @@ enum OID {
 
 	/* Microsoft Authenticode & Software Publishing */
 	OID_msIndirectData,		/* 1.3.6.1.4.1.311.2.1.4 */
+	OID_msStatementType,		/* 1.3.6.1.4.1.311.2.1.11 */
+	OID_msSpOpusInfo,		/* 1.3.6.1.4.1.311.2.1.12 */
 	OID_msPeImageDataObjId,		/* 1.3.6.1.4.1.311.2.1.15 */
 	OID_msIndividualSPKeyPurpose,	/* 1.3.6.1.4.1.311.2.1.21 */
 	OID_msOutlookExpress,		/* 1.3.6.1.4.1.311.16.4 */
diff --git a/include/linux/verify_pefile.h b/include/linux/verify_pefile.h
index ac34819214f913..da2049b5161ce7 100644
--- a/include/linux/verify_pefile.h
+++ b/include/linux/verify_pefile.h
@@ -12,7 +12,11 @@
 #ifndef _LINUX_VERIFY_PEFILE_H
 #define _LINUX_VERIFY_PEFILE_H
 
+#include <crypto/public_key.h>
+
 extern int verify_pefile_signature(const void *pebuf, unsigned pelen,
-				   struct key *trusted_keyring, bool *_trusted);
+				   struct key *trusted_keyring,
+				   enum key_being_used_for usage,
+				   bool *_trusted);
 
 #endif /* _LINUX_VERIFY_PEFILE_H */
diff --git a/kernel/module_signing.c b/kernel/module_signing.c
index 70ad463f6df059..bd62f5cda74673 100644
--- a/kernel/module_signing.c
+++ b/kernel/module_signing.c
@@ -72,5 +72,6 @@ int mod_verify_sig(const void *mod, unsigned long *_modlen)
 		return -EBADMSG;
 	}
 
-	return system_verify_data(mod, modlen, mod + modlen, sig_len);
+	return system_verify_data(mod, modlen, mod + modlen, sig_len,
+				  VERIFYING_MODULE_SIGNATURE);
 }
diff --git a/kernel/system_keyring.c b/kernel/system_keyring.c
index 95f2dcbc761626..2570598b784de0 100644
--- a/kernel/system_keyring.c
+++ b/kernel/system_keyring.c
@@ -113,9 +113,11 @@ late_initcall(load_system_certificate_list);
  * @len: Size of @data.
  * @raw_pkcs7: The PKCS#7 message that is the signature.
  * @pkcs7_len: The size of @raw_pkcs7.
+ * @usage: The use to which the key is being put.
  */
 int system_verify_data(const void *data, unsigned long len,
-		       const void *raw_pkcs7, size_t pkcs7_len)
+		       const void *raw_pkcs7, size_t pkcs7_len,
+		       enum key_being_used_for usage)
 {
 	struct pkcs7_message *pkcs7;
 	bool trusted;
@@ -132,7 +134,7 @@ int system_verify_data(const void *data, unsigned long len,
 		goto error;
 	}
 
-	ret = pkcs7_verify(pkcs7);
+	ret = pkcs7_verify(pkcs7, usage);
 	if (ret < 0)
 		goto error;
 
diff --git a/scripts/sign-file.c b/scripts/sign-file.c
index de213e5c0cd3be..e9741e879bbdd0 100755
--- a/scripts/sign-file.c
+++ b/scripts/sign-file.c
@@ -111,7 +111,7 @@ int main(int argc, char **argv)
 	bool sign_only = false;
 	unsigned char buf[4096];
 	unsigned long module_size, cms_size;
-	unsigned int use_keyid = 0;
+	unsigned int use_keyid = 0, use_signed_attrs = CMS_NOATTR;
 	const EVP_MD *digest_algo;
 	EVP_PKEY *private_key;
 	CMS_ContentInfo *cms;
@@ -216,7 +216,8 @@ int main(int argc, char **argv)
 	ERR(!cms, "CMS_sign");
 
 	ERR(!CMS_add1_signer(cms, x509, private_key, digest_algo,
-			     CMS_NOCERTS | CMS_BINARY | CMS_NOSMIMECAP | use_keyid),
+			     CMS_NOCERTS | CMS_BINARY | CMS_NOSMIMECAP |
+			     use_keyid | use_signed_attrs),
 	    "CMS_sign_add_signer");
 	ERR(CMS_final(cms, bm, NULL, CMS_NOCERTS | CMS_BINARY) < 0,
 	    "CMS_final");

From 228c37ff980f5643401a1667f5ab7c6f38602cf8 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 11 Aug 2015 12:38:54 +0100
Subject: [PATCH 223/734] sign-file: Document dependency on OpenSSL devel
 libraries

The revised sign-file program is no longer a script that wraps the openssl
program, but now rather a program that makes use of OpenSSL's crypto
library.  This means that to build the sign-file program, the kernel build
process now has a dependency on the OpenSSL development packages in
addition to OpenSSL itself.

Document this in Kconfig and in module-signing.txt.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: David Woodhouse <David.Woodhouse@intel.com>
---
 Documentation/module-signing.txt | 3 +++
 init/Kconfig                     | 4 ++++
 2 files changed, 7 insertions(+)

diff --git a/Documentation/module-signing.txt b/Documentation/module-signing.txt
index 4e62bc29666ec5..02a9baf1c72fd0 100644
--- a/Documentation/module-signing.txt
+++ b/Documentation/module-signing.txt
@@ -111,6 +111,9 @@ This has a number of options available:
      additional certificates which will be included in the system keyring by
      default.
 
+Note that enabling module signing adds a dependency on the OpenSSL devel
+packages to the kernel build processes for the tool that does the signing.
+
 
 =======================
 GENERATING SIGNING KEYS
diff --git a/init/Kconfig b/init/Kconfig
index 62b725653c36db..5d1a703663ad98 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1897,6 +1897,10 @@ config MODULE_SIG
 	  is simply appended to the module. For more information see
 	  Documentation/module-signing.txt.
 
+	  Note that this option adds the OpenSSL development packages as a
+	  kernel build dependency so that the signing tool can use its crypto
+	  library.
+
 	  !!!WARNING!!!  If you enable this option, you MUST make sure that the
 	  module DOES NOT get stripped after being signed.  This includes the
 	  debuginfo strip done by some packagers (such as rpmbuild) and

From 3d04c92403d377918c9a3bddab6ee103f0db25dd Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Wed, 12 Aug 2015 11:56:02 -0700
Subject: [PATCH 224/734] Smack - Fix build error with bringup unconfigured

The changes for mounting binary filesystems was allied
improperly, with the list of tokens being in an ifdef that
it shouldn't have been. Fix that, and a couple style issues
that were bothering me.

Reported-by: Jim Davis <jim.epost@gmail.com>
Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
---
 security/smack/smack_lsm.c | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 54fb3a1d8953be..996c889564383a 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -57,15 +57,7 @@ LIST_HEAD(smk_ipv6_port_list);
 static struct kmem_cache *smack_inode_cache;
 int smack_enabled;
 
-#ifdef CONFIG_SECURITY_SMACK_BRINGUP
-static char *smk_bu_mess[] = {
-	"Bringup Error",	/* Unused */
-	"Bringup",		/* SMACK_BRINGUP_ALLOW */
-	"Unconfined Subject",	/* SMACK_UNCONFINED_SUBJECT */
-	"Unconfined Object",	/* SMACK_UNCONFINED_OBJECT */
-};
-
-static const match_table_t tokens = {
+static const match_table_t smk_mount_tokens = {
 	{Opt_fsdefault, SMK_FSDEFAULT "%s"},
 	{Opt_fsfloor, SMK_FSFLOOR "%s"},
 	{Opt_fshat, SMK_FSHAT "%s"},
@@ -74,6 +66,14 @@ static const match_table_t tokens = {
 	{Opt_error, NULL},
 };
 
+#ifdef CONFIG_SECURITY_SMACK_BRINGUP
+static char *smk_bu_mess[] = {
+	"Bringup Error",	/* Unused */
+	"Bringup",		/* SMACK_BRINGUP_ALLOW */
+	"Unconfined Subject",	/* SMACK_UNCONFINED_SUBJECT */
+	"Unconfined Object",	/* SMACK_UNCONFINED_OBJECT */
+};
+
 static void smk_bu_mode(int mode, char *s)
 {
 	int i = 0;
@@ -599,9 +599,14 @@ static int smack_parse_opts_str(char *options,
 		struct security_mnt_opts *opts)
 {
 	char *p;
-	char *fsdefault = NULL, *fsfloor = NULL;
-	char *fshat = NULL, *fsroot = NULL, *fstransmute = NULL;
-	int rc = -ENOMEM, num_mnt_opts = 0;
+	char *fsdefault = NULL;
+	char *fsfloor = NULL;
+	char *fshat = NULL;
+	char *fsroot = NULL;
+	char *fstransmute = NULL;
+	int rc = -ENOMEM;
+	int num_mnt_opts = 0;
+	int token;
 
 	opts->num_mnt_opts = 0;
 
@@ -609,13 +614,12 @@ static int smack_parse_opts_str(char *options,
 		return 0;
 
 	while ((p = strsep(&options, ",")) != NULL) {
-		int token;
 		substring_t args[MAX_OPT_ARGS];
 
 		if (!*p)
 			continue;
 
-		token = match_token(p, tokens, args);
+		token = match_token(p, smk_mount_tokens, args);
 
 		switch (token) {
 		case Opt_fsdefault:

From 772111ab01eace6a7e4cf821a4348cec64a97c92 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 13 Aug 2015 02:51:33 +0100
Subject: [PATCH 225/734] PKCS#7: Add MODULE_LICENSE() to test module

Add a MODULE_LICENSE() line to the PKCS#7 test key module to fix this
warning:

	WARNING: modpost: missing MODULE_LICENSE() in
	crypto/asymmetric_keys/pkcs7_test_key.o

Whilst we're at it, also add a module description.

Reported-by: James Morris <jmorris@namei.org>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 crypto/asymmetric_keys/pkcs7_key_type.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/crypto/asymmetric_keys/pkcs7_key_type.c b/crypto/asymmetric_keys/pkcs7_key_type.c
index 10d34dbd00b9b1..e2d0edbbc71acd 100644
--- a/crypto/asymmetric_keys/pkcs7_key_type.c
+++ b/crypto/asymmetric_keys/pkcs7_key_type.c
@@ -20,6 +20,9 @@
 #include <keys/system_keyring.h>
 #include "pkcs7_parser.h"
 
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("PKCS#7 testing key type");
+
 static unsigned pkcs7_usage;
 module_param_named(usage, pkcs7_usage, uint, S_IWUSR | S_IRUGO);
 MODULE_PARM_DESC(pkcs7_usage,

From 15ce414b82b07acb99afda6e4d9bd14f317b6011 Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Sat, 8 Aug 2015 10:20:25 -0400
Subject: [PATCH 226/734] fixup: audit: implement audit by executable

The Intel build-bot detected a sparse warning with with a patch I posted a
couple of days ago that was accepted in the audit/next tree:

Subject: [linux-next:master 6689/6751] kernel/audit_watch.c:543:36: sparse: dereference of noderef expression
Date: Friday, August 07, 2015, 06:57:55 PM
From: kbuild test robot <fengguang.wu@intel.com>
tree:   git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master
head:   e6455bc5b91f41f842f30465c9193320f0568707
commit: 2e3a8aeb63e5335d4f837d453787c71bcb479796 [6689/6751] Merge remote- tracking branch 'audit/next'
sparse warnings: (new ones prefixed by >>)
>> kernel/audit_watch.c:543:36: sparse: dereference of noderef expression
   kernel/audit_watch.c:544:28: sparse: dereference of noderef expression

34d99af5 Richard Guy Briggs 2015-08-05  541  int audit_exe_compare(struct task_struct *tsk, struct audit_fsnotify_mark *mark)
34d99af5 Richard Guy Briggs 2015-08-05  542  {
34d99af5 Richard Guy Briggs 2015-08-05 @543     unsigned long ino = tsk->mm- >exe_file->f_inode->i_ino;
34d99af5 Richard Guy Briggs 2015-08-05  544     dev_t dev = tsk->mm->exe_file- >f_inode->i_sb->s_dev;

:::::: The code at line 543 was first introduced by commit
:::::: 34d99af52ad40bd498ba66970579a5bc1fb1a3bc audit: implement audit by executable

tsk->mm->exe_file requires RCU access.  The warning was reproduceable by adding
"C=1 CF=-D__CHECK_ENDIAN__" to the build command, and verified eliminated with
this patch.

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Paul Moore <pmoore@redhat.com>
---
 kernel/audit_watch.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 27ef8dcf7cd8ab..359035caac88ad 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -540,8 +540,14 @@ int audit_dupe_exe(struct audit_krule *new, struct audit_krule *old)
 
 int audit_exe_compare(struct task_struct *tsk, struct audit_fsnotify_mark *mark)
 {
-	unsigned long ino = tsk->mm->exe_file->f_inode->i_ino;
-	dev_t dev = tsk->mm->exe_file->f_inode->i_sb->s_dev;
-
+	struct file *exe_file;
+	unsigned long ino;
+	dev_t dev;
+
+	rcu_read_lock();
+	exe_file = rcu_dereference(tsk->mm->exe_file);
+	ino = exe_file->f_inode->i_ino;
+	dev = exe_file->f_inode->i_sb->s_dev;
+	rcu_read_unlock();
 	return audit_mark_compare(mark, ino, dev);
 }

From e9a5e8cc55286941503f36c5b7485a5aa923b3f1 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 13 Aug 2015 04:03:12 +0100
Subject: [PATCH 227/734] sign-file: Fix warning about BIO_reset() return value
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the following warning:

	scripts/sign-file.c: In function ‘main’:
	scripts/sign-file.c:188: warning: value computed is not used

whereby the result of BIO_ctrl() is cast inside of BIO_reset() to an
integer of a different size - which we're not checking but probably should.

Reported-by: James Morris <jmorris@namei.org>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 scripts/sign-file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/sign-file.c b/scripts/sign-file.c
index e9741e879bbdd0..058bba3103e270 100755
--- a/scripts/sign-file.c
+++ b/scripts/sign-file.c
@@ -185,7 +185,7 @@ int main(int argc, char **argv)
 	ERR(!b, "%s", x509_name);
 	x509 = d2i_X509_bio(b, NULL); /* Binary encoded X.509 */
 	if (!x509) {
-		BIO_reset(b);
+		ERR(BIO_reset(b) != 1, "%s", x509_name);
 		x509 = PEM_read_bio_X509(b, NULL, NULL, NULL); /* PEM encoded X.509 */
 		if (x509)
 			drain_openssl_errors();

From 9113785c3e918187b6b0c084c60e0344a2f1685c Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Mon, 27 Jul 2015 13:29:00 +0100
Subject: [PATCH 228/734] iommu/tegra-smmu: Fix iova_to_phys() method

iova_to_phys() has several problems:
(a) iova_to_phys() is supposed to return 0 if there is no entry present
    for the iova.
(b) if as_get_pte() fails, we oops the kernel by dereferencing a NULL
    pointer.  Really, we should not even be trying to allocate a page
    table at all, but should only be returning the presence of the 2nd
    level page table.  This will be fixed in a subsequent patch.

Treat both of these conditions as "no mapping" conditions.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/iommu/tegra-smmu.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index c1f2e521dc52cd..083354903a1a37 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -592,6 +592,9 @@ static phys_addr_t tegra_smmu_iova_to_phys(struct iommu_domain *domain,
 	u32 *pte;
 
 	pte = as_get_pte(as, iova, &page);
+	if (!pte || !*pte)
+		return 0;
+
 	pfn = *pte & as->smmu->pfn_mask;
 
 	return PFN_PHYS(pfn);

From b98e34f0c6f1c4ac7af41afecc4a26f5f2ebe68d Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Mon, 27 Jul 2015 13:29:05 +0100
Subject: [PATCH 229/734] iommu/tegra-smmu: Fix unmap() method

The Tegra SMMU unmap path has several problems:
1. as_pte_put() can perform a write-after-free
2. tegra_smmu_unmap() can perform cache maintanence on a page we have
   just freed.
3. when a page table is unmapped, there is no CPU cache maintanence of
   the write clearing the page directory entry, nor is there any
   maintanence of the IOMMU to ensure that it sees the page table has
   gone.

Fix this by getting rid of as_pte_put(), and instead coding the PTE
unmap separately from the PDE unmap, placing the PDE unmap after the
PTE unmap has been completed.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/iommu/tegra-smmu.c | 37 +++++++++++++++++++++++--------------
 1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 083354903a1a37..a7a7645fb2688c 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -509,29 +509,35 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova,
 	return &pt[pte];
 }
 
-static void as_put_pte(struct tegra_smmu_as *as, dma_addr_t iova)
+static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova)
 {
+	struct tegra_smmu *smmu = as->smmu;
 	u32 pde = (iova >> SMMU_PDE_SHIFT) & 0x3ff;
-	u32 pte = (iova >> SMMU_PTE_SHIFT) & 0x3ff;
 	u32 *count = page_address(as->count);
-	u32 *pd = page_address(as->pd), *pt;
+	u32 *pd = page_address(as->pd);
 	struct page *page;
 
-	page = pfn_to_page(pd[pde] & as->smmu->pfn_mask);
-	pt = page_address(page);
+	page = pfn_to_page(pd[pde] & smmu->pfn_mask);
 
 	/*
 	 * When no entries in this page table are used anymore, return the
 	 * memory page to the system.
 	 */
-	if (pt[pte] != 0) {
-		if (--count[pde] == 0) {
-			ClearPageReserved(page);
-			__free_page(page);
-			pd[pde] = 0;
-		}
+	if (--count[pde] == 0) {
+		unsigned int offset = pde * sizeof(*pd);
 
-		pt[pte] = 0;
+		/* Clear the page directory entry first */
+		pd[pde] = 0;
+
+		/* Flush the page directory entry */
+		smmu->soc->ops->flush_dcache(as->pd, offset, sizeof(*pd));
+		smmu_flush_ptc(smmu, as->pd, offset);
+		smmu_flush_tlb_section(smmu, as->id, iova);
+		smmu_flush(smmu);
+
+		/* Finally, free the page */
+		ClearPageReserved(page);
+		__free_page(page);
 	}
 }
 
@@ -569,17 +575,20 @@ static size_t tegra_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
 	u32 *pte;
 
 	pte = as_get_pte(as, iova, &page);
-	if (!pte)
+	if (!pte || !*pte)
 		return 0;
 
+	*pte = 0;
+
 	offset = offset_in_page(pte);
-	as_put_pte(as, iova);
 
 	smmu->soc->ops->flush_dcache(page, offset, 4);
 	smmu_flush_ptc(smmu, page, offset);
 	smmu_flush_tlb_group(smmu, as->id, iova);
 	smmu_flush(smmu);
 
+	tegra_smmu_pte_put_use(as, iova);
+
 	return size;
 }
 

From 8482ee5ea1097445f6498ee522965f5311667763 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Mon, 27 Jul 2015 13:29:10 +0100
Subject: [PATCH 230/734] iommu/tegra-smmu: Factor out common PTE setting

Factor out the common PTE setting code into a separate function.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/iommu/tegra-smmu.c | 37 +++++++++++++++++--------------------
 1 file changed, 17 insertions(+), 20 deletions(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index a7a7645fb2688c..53d0f15dac6f5d 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -541,12 +541,24 @@ static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova)
 	}
 }
 
+static void tegra_smmu_set_pte(struct tegra_smmu_as *as, unsigned long iova,
+			       u32 *pte, struct page *pte_page, u32 val)
+{
+	struct tegra_smmu *smmu = as->smmu;
+	unsigned long offset = offset_in_page(pte);
+
+	*pte = val;
+
+	smmu->soc->ops->flush_dcache(pte_page, offset, 4);
+	smmu_flush_ptc(smmu, pte_page, offset);
+	smmu_flush_tlb_group(smmu, as->id, iova);
+	smmu_flush(smmu);
+}
+
 static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova,
 			  phys_addr_t paddr, size_t size, int prot)
 {
 	struct tegra_smmu_as *as = to_smmu_as(domain);
-	struct tegra_smmu *smmu = as->smmu;
-	unsigned long offset;
 	struct page *page;
 	u32 *pte;
 
@@ -554,13 +566,8 @@ static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova,
 	if (!pte)
 		return -ENOMEM;
 
-	*pte = __phys_to_pfn(paddr) | SMMU_PTE_ATTR;
-	offset = offset_in_page(pte);
-
-	smmu->soc->ops->flush_dcache(page, offset, 4);
-	smmu_flush_ptc(smmu, page, offset);
-	smmu_flush_tlb_group(smmu, as->id, iova);
-	smmu_flush(smmu);
+	tegra_smmu_set_pte(as, iova, pte, page,
+			   __phys_to_pfn(paddr) | SMMU_PTE_ATTR);
 
 	return 0;
 }
@@ -569,8 +576,6 @@ static size_t tegra_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
 			       size_t size)
 {
 	struct tegra_smmu_as *as = to_smmu_as(domain);
-	struct tegra_smmu *smmu = as->smmu;
-	unsigned long offset;
 	struct page *page;
 	u32 *pte;
 
@@ -578,15 +583,7 @@ static size_t tegra_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
 	if (!pte || !*pte)
 		return 0;
 
-	*pte = 0;
-
-	offset = offset_in_page(pte);
-
-	smmu->soc->ops->flush_dcache(page, offset, 4);
-	smmu_flush_ptc(smmu, page, offset);
-	smmu_flush_tlb_group(smmu, as->id, iova);
-	smmu_flush(smmu);
-
+	tegra_smmu_set_pte(as, iova, pte, page, 0);
 	tegra_smmu_pte_put_use(as, iova);
 
 	return size;

From 34d35f8cbe51bf93faf3214ee5b5d6f8ae7df4c1 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Mon, 27 Jul 2015 13:29:16 +0100
Subject: [PATCH 231/734] iommu/tegra-smmu: Add iova_pd_index() and
 iova_pt_index() helpers

Add a pair of helpers to get the page directory and page table indexes.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/iommu/tegra-smmu.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 53d0f15dac6f5d..4c4bc79660465c 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -134,6 +134,16 @@ static inline u32 smmu_readl(struct tegra_smmu *smmu, unsigned long offset)
 #define SMMU_PTE_ATTR		(SMMU_PTE_READABLE | SMMU_PTE_WRITABLE | \
 				 SMMU_PTE_NONSECURE)
 
+static unsigned int iova_pd_index(unsigned long iova)
+{
+	return (iova >> SMMU_PDE_SHIFT) & (SMMU_NUM_PDE - 1);
+}
+
+static unsigned int iova_pt_index(unsigned long iova)
+{
+	return (iova >> SMMU_PTE_SHIFT) & (SMMU_NUM_PTE - 1);
+}
+
 static inline void smmu_flush_ptc(struct tegra_smmu *smmu, struct page *page,
 				  unsigned long offset)
 {
@@ -469,8 +479,8 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova,
 		       struct page **pagep)
 {
 	u32 *pd = page_address(as->pd), *pt, *count;
-	u32 pde = (iova >> SMMU_PDE_SHIFT) & 0x3ff;
-	u32 pte = (iova >> SMMU_PTE_SHIFT) & 0x3ff;
+	unsigned int pde = iova_pd_index(iova);
+	unsigned int pte = iova_pt_index(iova);
 	struct tegra_smmu *smmu = as->smmu;
 	struct page *page;
 	unsigned int i;
@@ -512,7 +522,7 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova,
 static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova)
 {
 	struct tegra_smmu *smmu = as->smmu;
-	u32 pde = (iova >> SMMU_PDE_SHIFT) & 0x3ff;
+	unsigned int pde = iova_pd_index(iova);
 	u32 *count = page_address(as->count);
 	u32 *pd = page_address(as->pd);
 	struct page *page;

From 0b42c7c1132f331fba263f0d2ca23544770584b7 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Mon, 27 Jul 2015 13:29:21 +0100
Subject: [PATCH 232/734] iommu/tegra-smmu: Fix page table lookup in
 unmap/iova_to_phys methods

Fix the page table lookup in the unmap and iova_to_phys methods.
Neither of these methods should allocate a page table; a missing page
table should be treated the same as no mapping present.

More importantly, using as_get_pte() for an IOVA corresponding with a
non-present page table entry increments the use-count for the page
table, on the assumption that the caller of as_get_pte() is going to
setup a mapping.  This is an incorrect assumption.

Fix both of these bugs by providing a separate helper which only looks
up the page table, but never allocates it.  This is akin to pte_offset()
for CPU page tables.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/iommu/tegra-smmu.c | 43 ++++++++++++++++++++++++++++++--------
 1 file changed, 34 insertions(+), 9 deletions(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 4c4bc79660465c..bbff5b647183c4 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -475,12 +475,36 @@ static void tegra_smmu_detach_dev(struct iommu_domain *domain, struct device *de
 	}
 }
 
+static u32 *tegra_smmu_pte_offset(struct page *pt_page, unsigned long iova)
+{
+	u32 *pt = page_address(pt_page);
+
+	return pt + iova_pt_index(iova);
+}
+
+static u32 *tegra_smmu_pte_lookup(struct tegra_smmu_as *as, unsigned long iova,
+				  struct page **pagep)
+{
+	unsigned int pd_index = iova_pd_index(iova);
+	struct page *pt_page;
+	u32 *pd;
+
+	pd = page_address(as->pd);
+
+	if (!pd[pd_index])
+		return NULL;
+
+	pt_page = pfn_to_page(pd[pd_index] & as->smmu->pfn_mask);
+	*pagep = pt_page;
+
+	return tegra_smmu_pte_offset(pt_page, iova);
+}
+
 static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova,
 		       struct page **pagep)
 {
 	u32 *pd = page_address(as->pd), *pt, *count;
 	unsigned int pde = iova_pd_index(iova);
-	unsigned int pte = iova_pt_index(iova);
 	struct tegra_smmu *smmu = as->smmu;
 	struct page *page;
 	unsigned int i;
@@ -506,17 +530,18 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova,
 		smmu_flush(smmu);
 	} else {
 		page = pfn_to_page(pd[pde] & smmu->pfn_mask);
-		pt = page_address(page);
 	}
 
 	*pagep = page;
 
+	pt = page_address(page);
+
 	/* Keep track of entries in this page table. */
 	count = page_address(as->count);
-	if (pt[pte] == 0)
+	if (pt[iova_pt_index(iova)] == 0)
 		count[pde]++;
 
-	return &pt[pte];
+	return tegra_smmu_pte_offset(page, iova);
 }
 
 static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova)
@@ -586,14 +611,14 @@ static size_t tegra_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
 			       size_t size)
 {
 	struct tegra_smmu_as *as = to_smmu_as(domain);
-	struct page *page;
+	struct page *pte_page;
 	u32 *pte;
 
-	pte = as_get_pte(as, iova, &page);
+	pte = tegra_smmu_pte_lookup(as, iova, &pte_page);
 	if (!pte || !*pte)
 		return 0;
 
-	tegra_smmu_set_pte(as, iova, pte, page, 0);
+	tegra_smmu_set_pte(as, iova, pte, pte_page, 0);
 	tegra_smmu_pte_put_use(as, iova);
 
 	return size;
@@ -603,11 +628,11 @@ static phys_addr_t tegra_smmu_iova_to_phys(struct iommu_domain *domain,
 					   dma_addr_t iova)
 {
 	struct tegra_smmu_as *as = to_smmu_as(domain);
-	struct page *page;
+	struct page *pte_page;
 	unsigned long pfn;
 	u32 *pte;
 
-	pte = as_get_pte(as, iova, &page);
+	pte = tegra_smmu_pte_lookup(as, iova, &pte_page);
 	if (!pte || !*pte)
 		return 0;
 

From 853520fa96511e4a49942d2cba34a329528c7e41 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Mon, 27 Jul 2015 13:29:26 +0100
Subject: [PATCH 233/734] iommu/tegra-smmu: Store struct page pointer for page
 tables

Store the struct page pointer for the second level page tables, rather
than working back from the page directory entry.  This is necessary as
we want to eliminate the use of physical addresses used with
arch-private functions, switching instead to use the streaming DMA API.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/iommu/tegra-smmu.c | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index bbff5b647183c4..8ec5ac45caabff 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -41,6 +41,7 @@ struct tegra_smmu_as {
 	struct tegra_smmu *smmu;
 	unsigned int use_count;
 	struct page *count;
+	struct page **pts;
 	struct page *pd;
 	unsigned id;
 	u32 attr;
@@ -271,6 +272,14 @@ static struct iommu_domain *tegra_smmu_domain_alloc(unsigned type)
 		return NULL;
 	}
 
+	as->pts = kcalloc(SMMU_NUM_PDE, sizeof(*as->pts), GFP_KERNEL);
+	if (!as->pts) {
+		__free_page(as->count);
+		__free_page(as->pd);
+		kfree(as);
+		return NULL;
+	}
+
 	/* clear PDEs */
 	pd = page_address(as->pd);
 	SetPageReserved(as->pd);
@@ -487,14 +496,11 @@ static u32 *tegra_smmu_pte_lookup(struct tegra_smmu_as *as, unsigned long iova,
 {
 	unsigned int pd_index = iova_pd_index(iova);
 	struct page *pt_page;
-	u32 *pd;
 
-	pd = page_address(as->pd);
-
-	if (!pd[pd_index])
+	pt_page = as->pts[pd_index];
+	if (!pt_page)
 		return NULL;
 
-	pt_page = pfn_to_page(pd[pd_index] & as->smmu->pfn_mask);
 	*pagep = pt_page;
 
 	return tegra_smmu_pte_offset(pt_page, iova);
@@ -509,7 +515,7 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova,
 	struct page *page;
 	unsigned int i;
 
-	if (pd[pde] == 0) {
+	if (!as->pts[pde]) {
 		page = alloc_page(GFP_KERNEL | __GFP_DMA);
 		if (!page)
 			return NULL;
@@ -520,6 +526,8 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova,
 		for (i = 0; i < SMMU_NUM_PTE; i++)
 			pt[i] = 0;
 
+		as->pts[pde] = page;
+
 		smmu->soc->ops->flush_dcache(page, 0, SMMU_SIZE_PT);
 
 		pd[pde] = SMMU_MK_PDE(page, SMMU_PDE_ATTR | SMMU_PDE_NEXT);
@@ -529,7 +537,7 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova,
 		smmu_flush_tlb_section(smmu, as->id, iova);
 		smmu_flush(smmu);
 	} else {
-		page = pfn_to_page(pd[pde] & smmu->pfn_mask);
+		page = as->pts[pde];
 	}
 
 	*pagep = page;
@@ -550,9 +558,7 @@ static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova)
 	unsigned int pde = iova_pd_index(iova);
 	u32 *count = page_address(as->count);
 	u32 *pd = page_address(as->pd);
-	struct page *page;
-
-	page = pfn_to_page(pd[pde] & smmu->pfn_mask);
+	struct page *page = as->pts[pde];
 
 	/*
 	 * When no entries in this page table are used anymore, return the
@@ -573,6 +579,7 @@ static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova)
 		/* Finally, free the page */
 		ClearPageReserved(page);
 		__free_page(page);
+		as->pts[pde] = NULL;
 	}
 }
 

From 32924c76b0cbc67aa4cf0741f7bc6c37f097aaf3 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Mon, 27 Jul 2015 13:29:31 +0100
Subject: [PATCH 234/734] iommu/tegra-smmu: Use kcalloc() to allocate counter
 array

Use kcalloc() to allocate the use-counter array for the page directory
entries/page tables.  Using kcalloc() allows us to be provided with
zero-initialised memory from the allocators, rather than initialising
it ourselves.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/iommu/tegra-smmu.c | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 8ec5ac45caabff..d649b06cc4caf5 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -40,7 +40,7 @@ struct tegra_smmu_as {
 	struct iommu_domain domain;
 	struct tegra_smmu *smmu;
 	unsigned int use_count;
-	struct page *count;
+	u32 *count;
 	struct page **pts;
 	struct page *pd;
 	unsigned id;
@@ -265,7 +265,7 @@ static struct iommu_domain *tegra_smmu_domain_alloc(unsigned type)
 		return NULL;
 	}
 
-	as->count = alloc_page(GFP_KERNEL);
+	as->count = kcalloc(SMMU_NUM_PDE, sizeof(u32), GFP_KERNEL);
 	if (!as->count) {
 		__free_page(as->pd);
 		kfree(as);
@@ -274,7 +274,7 @@ static struct iommu_domain *tegra_smmu_domain_alloc(unsigned type)
 
 	as->pts = kcalloc(SMMU_NUM_PDE, sizeof(*as->pts), GFP_KERNEL);
 	if (!as->pts) {
-		__free_page(as->count);
+		kfree(as->count);
 		__free_page(as->pd);
 		kfree(as);
 		return NULL;
@@ -284,13 +284,6 @@ static struct iommu_domain *tegra_smmu_domain_alloc(unsigned type)
 	pd = page_address(as->pd);
 	SetPageReserved(as->pd);
 
-	for (i = 0; i < SMMU_NUM_PDE; i++)
-		pd[i] = 0;
-
-	/* clear PDE usage counters */
-	pd = page_address(as->count);
-	SetPageReserved(as->count);
-
 	for (i = 0; i < SMMU_NUM_PDE; i++)
 		pd[i] = 0;
 
@@ -509,7 +502,7 @@ static u32 *tegra_smmu_pte_lookup(struct tegra_smmu_as *as, unsigned long iova,
 static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova,
 		       struct page **pagep)
 {
-	u32 *pd = page_address(as->pd), *pt, *count;
+	u32 *pd = page_address(as->pd), *pt;
 	unsigned int pde = iova_pd_index(iova);
 	struct tegra_smmu *smmu = as->smmu;
 	struct page *page;
@@ -545,9 +538,8 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova,
 	pt = page_address(page);
 
 	/* Keep track of entries in this page table. */
-	count = page_address(as->count);
 	if (pt[iova_pt_index(iova)] == 0)
-		count[pde]++;
+		as->count[pde]++;
 
 	return tegra_smmu_pte_offset(page, iova);
 }
@@ -556,7 +548,6 @@ static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova)
 {
 	struct tegra_smmu *smmu = as->smmu;
 	unsigned int pde = iova_pd_index(iova);
-	u32 *count = page_address(as->count);
 	u32 *pd = page_address(as->pd);
 	struct page *page = as->pts[pde];
 
@@ -564,7 +555,7 @@ static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova)
 	 * When no entries in this page table are used anymore, return the
 	 * memory page to the system.
 	 */
-	if (--count[pde] == 0) {
+	if (--as->count[pde] == 0) {
 		unsigned int offset = pde * sizeof(*pd);
 
 		/* Clear the page directory entry first */

From 4b3c7d10765403ab19628fb7d530b8ce1c50b81d Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Mon, 27 Jul 2015 13:29:36 +0100
Subject: [PATCH 235/734] iommu/tegra-smmu: Move flush_dcache to tegra-smmu.c

Drivers should not be using __cpuc_* functions nor outer_cache_flush()
directly.  This change partly cleans up tegra-smmu.c.

The only difference between cache handling of the tegra variants is
Denver, which omits the call to outer_cache_flush().  This is due to
Denver being an ARM64 CPU, and the ARM64 architecture does not provide
this function.  (This, in itself, is a good reason why these should not
be used.)

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
[treding@nvidia.com: fix build failure on 64-bit ARM]
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/iommu/tegra-smmu.c      | 30 +++++++++++++++++++++++++-----
 drivers/memory/tegra/tegra114.c | 17 -----------------
 drivers/memory/tegra/tegra124.c | 30 ------------------------------
 drivers/memory/tegra/tegra30.c  | 17 -----------------
 include/soc/tegra/mc.h          |  7 -------
 5 files changed, 25 insertions(+), 76 deletions(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index d649b06cc4caf5..42b13c07aeef36 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -16,6 +16,8 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
+#include <asm/cacheflush.h>
+
 #include <soc/tegra/ahb.h>
 #include <soc/tegra/mc.h>
 
@@ -145,6 +147,24 @@ static unsigned int iova_pt_index(unsigned long iova)
 	return (iova >> SMMU_PTE_SHIFT) & (SMMU_NUM_PTE - 1);
 }
 
+static void smmu_flush_dcache(struct page *page, unsigned long offset,
+			      size_t size)
+{
+#ifdef CONFIG_ARM
+	phys_addr_t phys = page_to_phys(page) + offset;
+#endif
+	void *virt = page_address(page) + offset;
+
+#ifdef CONFIG_ARM
+	__cpuc_flush_dcache_area(virt, size);
+	outer_flush_range(phys, phys + size);
+#endif
+
+#ifdef CONFIG_ARM64
+	__flush_dcache_area(virt, size);
+#endif
+}
+
 static inline void smmu_flush_ptc(struct tegra_smmu *smmu, struct page *page,
 				  unsigned long offset)
 {
@@ -392,7 +412,7 @@ static int tegra_smmu_as_prepare(struct tegra_smmu *smmu,
 	if (err < 0)
 		return err;
 
-	smmu->soc->ops->flush_dcache(as->pd, 0, SMMU_SIZE_PD);
+	smmu_flush_dcache(as->pd, 0, SMMU_SIZE_PD);
 	smmu_flush_ptc(smmu, as->pd, 0);
 	smmu_flush_tlb_asid(smmu, as->id);
 
@@ -521,11 +541,11 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova,
 
 		as->pts[pde] = page;
 
-		smmu->soc->ops->flush_dcache(page, 0, SMMU_SIZE_PT);
+		smmu_flush_dcache(page, 0, SMMU_SIZE_PT);
 
 		pd[pde] = SMMU_MK_PDE(page, SMMU_PDE_ATTR | SMMU_PDE_NEXT);
 
-		smmu->soc->ops->flush_dcache(as->pd, pde << 2, 4);
+		smmu_flush_dcache(as->pd, pde << 2, 4);
 		smmu_flush_ptc(smmu, as->pd, pde << 2);
 		smmu_flush_tlb_section(smmu, as->id, iova);
 		smmu_flush(smmu);
@@ -562,7 +582,7 @@ static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova)
 		pd[pde] = 0;
 
 		/* Flush the page directory entry */
-		smmu->soc->ops->flush_dcache(as->pd, offset, sizeof(*pd));
+		smmu_flush_dcache(as->pd, offset, sizeof(*pd));
 		smmu_flush_ptc(smmu, as->pd, offset);
 		smmu_flush_tlb_section(smmu, as->id, iova);
 		smmu_flush(smmu);
@@ -582,7 +602,7 @@ static void tegra_smmu_set_pte(struct tegra_smmu_as *as, unsigned long iova,
 
 	*pte = val;
 
-	smmu->soc->ops->flush_dcache(pte_page, offset, 4);
+	smmu_flush_dcache(pte_page, offset, 4);
 	smmu_flush_ptc(smmu, pte_page, offset);
 	smmu_flush_tlb_group(smmu, as->id, iova);
 	smmu_flush(smmu);
diff --git a/drivers/memory/tegra/tegra114.c b/drivers/memory/tegra/tegra114.c
index 9f579589e8000a..7122f39be9ccc1 100644
--- a/drivers/memory/tegra/tegra114.c
+++ b/drivers/memory/tegra/tegra114.c
@@ -9,8 +9,6 @@
 #include <linux/of.h>
 #include <linux/mm.h>
 
-#include <asm/cacheflush.h>
-
 #include <dt-bindings/memory/tegra114-mc.h>
 
 #include "mc.h"
@@ -914,20 +912,6 @@ static const struct tegra_smmu_swgroup tegra114_swgroups[] = {
 	{ .name = "tsec",      .swgroup = TEGRA_SWGROUP_TSEC,      .reg = 0x294 },
 };
 
-static void tegra114_flush_dcache(struct page *page, unsigned long offset,
-				  size_t size)
-{
-	phys_addr_t phys = page_to_phys(page) + offset;
-	void *virt = page_address(page) + offset;
-
-	__cpuc_flush_dcache_area(virt, size);
-	outer_flush_range(phys, phys + size);
-}
-
-static const struct tegra_smmu_ops tegra114_smmu_ops = {
-	.flush_dcache = tegra114_flush_dcache,
-};
-
 static const struct tegra_smmu_soc tegra114_smmu_soc = {
 	.clients = tegra114_mc_clients,
 	.num_clients = ARRAY_SIZE(tegra114_mc_clients),
@@ -936,7 +920,6 @@ static const struct tegra_smmu_soc tegra114_smmu_soc = {
 	.supports_round_robin_arbitration = false,
 	.supports_request_limit = false,
 	.num_asids = 4,
-	.ops = &tegra114_smmu_ops,
 };
 
 const struct tegra_mc_soc tegra114_mc_soc = {
diff --git a/drivers/memory/tegra/tegra124.c b/drivers/memory/tegra/tegra124.c
index 966e1557e6f414..ebda6328385395 100644
--- a/drivers/memory/tegra/tegra124.c
+++ b/drivers/memory/tegra/tegra124.c
@@ -9,8 +9,6 @@
 #include <linux/of.h>
 #include <linux/mm.h>
 
-#include <asm/cacheflush.h>
-
 #include <dt-bindings/memory/tegra124-mc.h>
 
 #include "mc.h"
@@ -1002,20 +1000,6 @@ static const struct tegra_smmu_swgroup tegra124_swgroups[] = {
 };
 
 #ifdef CONFIG_ARCH_TEGRA_124_SOC
-static void tegra124_flush_dcache(struct page *page, unsigned long offset,
-				  size_t size)
-{
-	phys_addr_t phys = page_to_phys(page) + offset;
-	void *virt = page_address(page) + offset;
-
-	__cpuc_flush_dcache_area(virt, size);
-	outer_flush_range(phys, phys + size);
-}
-
-static const struct tegra_smmu_ops tegra124_smmu_ops = {
-	.flush_dcache = tegra124_flush_dcache,
-};
-
 static const struct tegra_smmu_soc tegra124_smmu_soc = {
 	.clients = tegra124_mc_clients,
 	.num_clients = ARRAY_SIZE(tegra124_mc_clients),
@@ -1024,7 +1008,6 @@ static const struct tegra_smmu_soc tegra124_smmu_soc = {
 	.supports_round_robin_arbitration = true,
 	.supports_request_limit = true,
 	.num_asids = 128,
-	.ops = &tegra124_smmu_ops,
 };
 
 const struct tegra_mc_soc tegra124_mc_soc = {
@@ -1039,18 +1022,6 @@ const struct tegra_mc_soc tegra124_mc_soc = {
 #endif /* CONFIG_ARCH_TEGRA_124_SOC */
 
 #ifdef CONFIG_ARCH_TEGRA_132_SOC
-static void tegra132_flush_dcache(struct page *page, unsigned long offset,
-				  size_t size)
-{
-	void *virt = page_address(page) + offset;
-
-	__flush_dcache_area(virt, size);
-}
-
-static const struct tegra_smmu_ops tegra132_smmu_ops = {
-	.flush_dcache = tegra132_flush_dcache,
-};
-
 static const struct tegra_smmu_soc tegra132_smmu_soc = {
 	.clients = tegra124_mc_clients,
 	.num_clients = ARRAY_SIZE(tegra124_mc_clients),
@@ -1059,7 +1030,6 @@ static const struct tegra_smmu_soc tegra132_smmu_soc = {
 	.supports_round_robin_arbitration = true,
 	.supports_request_limit = true,
 	.num_asids = 128,
-	.ops = &tegra132_smmu_ops,
 };
 
 const struct tegra_mc_soc tegra132_mc_soc = {
diff --git a/drivers/memory/tegra/tegra30.c b/drivers/memory/tegra/tegra30.c
index 1abcd8f6f3ba60..3cb30b69d95bf5 100644
--- a/drivers/memory/tegra/tegra30.c
+++ b/drivers/memory/tegra/tegra30.c
@@ -9,8 +9,6 @@
 #include <linux/of.h>
 #include <linux/mm.h>
 
-#include <asm/cacheflush.h>
-
 #include <dt-bindings/memory/tegra30-mc.h>
 
 #include "mc.h"
@@ -936,20 +934,6 @@ static const struct tegra_smmu_swgroup tegra30_swgroups[] = {
 	{ .name = "isp",  .swgroup = TEGRA_SWGROUP_ISP,  .reg = 0x258 },
 };
 
-static void tegra30_flush_dcache(struct page *page, unsigned long offset,
-				 size_t size)
-{
-	phys_addr_t phys = page_to_phys(page) + offset;
-	void *virt = page_address(page) + offset;
-
-	__cpuc_flush_dcache_area(virt, size);
-	outer_flush_range(phys, phys + size);
-}
-
-static const struct tegra_smmu_ops tegra30_smmu_ops = {
-	.flush_dcache = tegra30_flush_dcache,
-};
-
 static const struct tegra_smmu_soc tegra30_smmu_soc = {
 	.clients = tegra30_mc_clients,
 	.num_clients = ARRAY_SIZE(tegra30_mc_clients),
@@ -958,7 +942,6 @@ static const struct tegra_smmu_soc tegra30_smmu_soc = {
 	.supports_round_robin_arbitration = false,
 	.supports_request_limit = false,
 	.num_asids = 4,
-	.ops = &tegra30_smmu_ops,
 };
 
 const struct tegra_mc_soc tegra30_mc_soc = {
diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h
index 1ab2813273cd1c..d6c3190ec852ed 100644
--- a/include/soc/tegra/mc.h
+++ b/include/soc/tegra/mc.h
@@ -51,11 +51,6 @@ struct tegra_smmu_swgroup {
 	unsigned int reg;
 };
 
-struct tegra_smmu_ops {
-	void (*flush_dcache)(struct page *page, unsigned long offset,
-			     size_t size);
-};
-
 struct tegra_smmu_soc {
 	const struct tegra_mc_client *clients;
 	unsigned int num_clients;
@@ -67,8 +62,6 @@ struct tegra_smmu_soc {
 	bool supports_request_limit;
 
 	unsigned int num_asids;
-
-	const struct tegra_smmu_ops *ops;
 };
 
 struct tegra_mc;

From b8fe03827b192a23d04e99c40d72e6b938fa6576 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Mon, 27 Jul 2015 13:29:41 +0100
Subject: [PATCH 236/734] iommu/tegra-smmu: Split smmu_flush_ptc()

smmu_flush_ptc() is used in two modes: one is to flush an individual
entry, the other is to flush all entries.  We know at the call site
which we require.  Split the function into smmu_flush_ptc_all() and
smmu_flush_ptc().

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/iommu/tegra-smmu.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 42b13c07aeef36..5c775b70ef8ce2 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -165,29 +165,29 @@ static void smmu_flush_dcache(struct page *page, unsigned long offset,
 #endif
 }
 
+static void smmu_flush_ptc_all(struct tegra_smmu *smmu)
+{
+	smmu_writel(smmu, SMMU_PTC_FLUSH_TYPE_ALL, SMMU_PTC_FLUSH);
+}
+
 static inline void smmu_flush_ptc(struct tegra_smmu *smmu, struct page *page,
 				  unsigned long offset)
 {
-	phys_addr_t phys = page ? page_to_phys(page) : 0;
+	phys_addr_t phys = page_to_phys(page);
 	u32 value;
 
-	if (page) {
-		offset &= ~(smmu->mc->soc->atom_size - 1);
+	offset &= ~(smmu->mc->soc->atom_size - 1);
 
-		if (smmu->mc->soc->num_address_bits > 32) {
+	if (smmu->mc->soc->num_address_bits > 32) {
 #ifdef CONFIG_PHYS_ADDR_T_64BIT
-			value = (phys >> 32) & SMMU_PTC_FLUSH_HI_MASK;
+		value = (phys >> 32) & SMMU_PTC_FLUSH_HI_MASK;
 #else
-			value = 0;
+		value = 0;
 #endif
-			smmu_writel(smmu, value, SMMU_PTC_FLUSH_HI);
-		}
-
-		value = (phys + offset) | SMMU_PTC_FLUSH_TYPE_ADR;
-	} else {
-		value = SMMU_PTC_FLUSH_TYPE_ALL;
+		smmu_writel(smmu, value, SMMU_PTC_FLUSH_HI);
 	}
 
+	value = (phys + offset) | SMMU_PTC_FLUSH_TYPE_ADR;
 	smmu_writel(smmu, value, SMMU_PTC_FLUSH);
 }
 
@@ -894,7 +894,7 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev,
 
 	smmu_writel(smmu, value, SMMU_TLB_CONFIG);
 
-	smmu_flush_ptc(smmu, NULL, 0);
+	smmu_flush_ptc_all(smmu);
 	smmu_flush_tlb(smmu);
 	smmu_writel(smmu, SMMU_CONFIG_ENABLE, SMMU_CONFIG);
 	smmu_flush(smmu);

From d62c7a886c2bc9f9258164814245dc0678b9a52e Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Mon, 27 Jul 2015 13:29:46 +0100
Subject: [PATCH 237/734] iommu/tegra-smmu: smmu_flush_ptc() wants device
 addresses

Pass smmu_flush_ptc() the device address rather than struct page
pointer.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/iommu/tegra-smmu.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 5c775b70ef8ce2..f420d871853565 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -170,10 +170,9 @@ static void smmu_flush_ptc_all(struct tegra_smmu *smmu)
 	smmu_writel(smmu, SMMU_PTC_FLUSH_TYPE_ALL, SMMU_PTC_FLUSH);
 }
 
-static inline void smmu_flush_ptc(struct tegra_smmu *smmu, struct page *page,
+static inline void smmu_flush_ptc(struct tegra_smmu *smmu, phys_addr_t phys,
 				  unsigned long offset)
 {
-	phys_addr_t phys = page_to_phys(page);
 	u32 value;
 
 	offset &= ~(smmu->mc->soc->atom_size - 1);
@@ -413,7 +412,7 @@ static int tegra_smmu_as_prepare(struct tegra_smmu *smmu,
 		return err;
 
 	smmu_flush_dcache(as->pd, 0, SMMU_SIZE_PD);
-	smmu_flush_ptc(smmu, as->pd, 0);
+	smmu_flush_ptc(smmu, page_to_phys(as->pd), 0);
 	smmu_flush_tlb_asid(smmu, as->id);
 
 	smmu_writel(smmu, as->id & 0x7f, SMMU_PTB_ASID);
@@ -546,7 +545,7 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova,
 		pd[pde] = SMMU_MK_PDE(page, SMMU_PDE_ATTR | SMMU_PDE_NEXT);
 
 		smmu_flush_dcache(as->pd, pde << 2, 4);
-		smmu_flush_ptc(smmu, as->pd, pde << 2);
+		smmu_flush_ptc(smmu, page_to_phys(as->pd), pde << 2);
 		smmu_flush_tlb_section(smmu, as->id, iova);
 		smmu_flush(smmu);
 	} else {
@@ -583,7 +582,7 @@ static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova)
 
 		/* Flush the page directory entry */
 		smmu_flush_dcache(as->pd, offset, sizeof(*pd));
-		smmu_flush_ptc(smmu, as->pd, offset);
+		smmu_flush_ptc(smmu, page_to_phys(as->pd), offset);
 		smmu_flush_tlb_section(smmu, as->id, iova);
 		smmu_flush(smmu);
 
@@ -603,7 +602,7 @@ static void tegra_smmu_set_pte(struct tegra_smmu_as *as, unsigned long iova,
 	*pte = val;
 
 	smmu_flush_dcache(pte_page, offset, 4);
-	smmu_flush_ptc(smmu, pte_page, offset);
+	smmu_flush_ptc(smmu, page_to_phys(pte_page), offset);
 	smmu_flush_tlb_group(smmu, as->id, iova);
 	smmu_flush(smmu);
 }

From e3c971960fd41fc55235ba05b95e053355cb0e73 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Mon, 27 Jul 2015 13:29:52 +0100
Subject: [PATCH 238/734] iommu/tegra-smmu: Convert to use DMA API

Use the DMA API instead of calling architecture internal functions in
the Tegra SMMU driver.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/iommu/tegra-smmu.c | 139 +++++++++++++++++++++++--------------
 1 file changed, 85 insertions(+), 54 deletions(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index f420d871853565..43b69c8cbe460a 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -16,8 +16,6 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
-#include <asm/cacheflush.h>
-
 #include <soc/tegra/ahb.h>
 #include <soc/tegra/mc.h>
 
@@ -45,6 +43,7 @@ struct tegra_smmu_as {
 	u32 *count;
 	struct page **pts;
 	struct page *pd;
+	dma_addr_t pd_dma;
 	unsigned id;
 	u32 attr;
 };
@@ -82,9 +81,9 @@ static inline u32 smmu_readl(struct tegra_smmu *smmu, unsigned long offset)
 #define  SMMU_PTB_ASID_VALUE(x) ((x) & 0x7f)
 
 #define SMMU_PTB_DATA 0x020
-#define  SMMU_PTB_DATA_VALUE(page, attr) (page_to_phys(page) >> 12 | (attr))
+#define  SMMU_PTB_DATA_VALUE(dma, attr) ((dma) >> 12 | (attr))
 
-#define SMMU_MK_PDE(page, attr) (page_to_phys(page) >> SMMU_PTE_SHIFT | (attr))
+#define SMMU_MK_PDE(dma, attr) ((dma) >> SMMU_PTE_SHIFT | (attr))
 
 #define SMMU_TLB_FLUSH 0x030
 #define  SMMU_TLB_FLUSH_VA_MATCH_ALL     (0 << 0)
@@ -147,22 +146,15 @@ static unsigned int iova_pt_index(unsigned long iova)
 	return (iova >> SMMU_PTE_SHIFT) & (SMMU_NUM_PTE - 1);
 }
 
-static void smmu_flush_dcache(struct page *page, unsigned long offset,
-			      size_t size)
+static bool smmu_dma_addr_valid(struct tegra_smmu *smmu, dma_addr_t addr)
 {
-#ifdef CONFIG_ARM
-	phys_addr_t phys = page_to_phys(page) + offset;
-#endif
-	void *virt = page_address(page) + offset;
-
-#ifdef CONFIG_ARM
-	__cpuc_flush_dcache_area(virt, size);
-	outer_flush_range(phys, phys + size);
-#endif
+	addr >>= 12;
+	return (addr & smmu->pfn_mask) == addr;
+}
 
-#ifdef CONFIG_ARM64
-	__flush_dcache_area(virt, size);
-#endif
+static dma_addr_t smmu_pde_to_dma(u32 pde)
+{
+	return pde << 12;
 }
 
 static void smmu_flush_ptc_all(struct tegra_smmu *smmu)
@@ -170,7 +162,7 @@ static void smmu_flush_ptc_all(struct tegra_smmu *smmu)
 	smmu_writel(smmu, SMMU_PTC_FLUSH_TYPE_ALL, SMMU_PTC_FLUSH);
 }
 
-static inline void smmu_flush_ptc(struct tegra_smmu *smmu, phys_addr_t phys,
+static inline void smmu_flush_ptc(struct tegra_smmu *smmu, dma_addr_t dma,
 				  unsigned long offset)
 {
 	u32 value;
@@ -178,15 +170,15 @@ static inline void smmu_flush_ptc(struct tegra_smmu *smmu, phys_addr_t phys,
 	offset &= ~(smmu->mc->soc->atom_size - 1);
 
 	if (smmu->mc->soc->num_address_bits > 32) {
-#ifdef CONFIG_PHYS_ADDR_T_64BIT
-		value = (phys >> 32) & SMMU_PTC_FLUSH_HI_MASK;
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+		value = (dma >> 32) & SMMU_PTC_FLUSH_HI_MASK;
 #else
 		value = 0;
 #endif
 		smmu_writel(smmu, value, SMMU_PTC_FLUSH_HI);
 	}
 
-	value = (phys + offset) | SMMU_PTC_FLUSH_TYPE_ADR;
+	value = (dma + offset) | SMMU_PTC_FLUSH_TYPE_ADR;
 	smmu_writel(smmu, value, SMMU_PTC_FLUSH);
 }
 
@@ -407,16 +399,26 @@ static int tegra_smmu_as_prepare(struct tegra_smmu *smmu,
 		return 0;
 	}
 
+	as->pd_dma = dma_map_page(smmu->dev, as->pd, 0, SMMU_SIZE_PD,
+				  DMA_TO_DEVICE);
+	if (dma_mapping_error(smmu->dev, as->pd_dma))
+		return -ENOMEM;
+
+	/* We can't handle 64-bit DMA addresses */
+	if (!smmu_dma_addr_valid(smmu, as->pd_dma)) {
+		err = -ENOMEM;
+		goto err_unmap;
+	}
+
 	err = tegra_smmu_alloc_asid(smmu, &as->id);
 	if (err < 0)
-		return err;
+		goto err_unmap;
 
-	smmu_flush_dcache(as->pd, 0, SMMU_SIZE_PD);
-	smmu_flush_ptc(smmu, page_to_phys(as->pd), 0);
+	smmu_flush_ptc(smmu, as->pd_dma, 0);
 	smmu_flush_tlb_asid(smmu, as->id);
 
 	smmu_writel(smmu, as->id & 0x7f, SMMU_PTB_ASID);
-	value = SMMU_PTB_DATA_VALUE(as->pd, as->attr);
+	value = SMMU_PTB_DATA_VALUE(as->pd_dma, as->attr);
 	smmu_writel(smmu, value, SMMU_PTB_DATA);
 	smmu_flush(smmu);
 
@@ -424,6 +426,10 @@ static int tegra_smmu_as_prepare(struct tegra_smmu *smmu,
 	as->use_count++;
 
 	return 0;
+
+err_unmap:
+	dma_unmap_page(smmu->dev, as->pd_dma, SMMU_SIZE_PD, DMA_TO_DEVICE);
+	return err;
 }
 
 static void tegra_smmu_as_unprepare(struct tegra_smmu *smmu,
@@ -433,6 +439,9 @@ static void tegra_smmu_as_unprepare(struct tegra_smmu *smmu,
 		return;
 
 	tegra_smmu_free_asid(smmu, as->id);
+
+	dma_unmap_page(smmu->dev, as->pd_dma, SMMU_SIZE_PD, DMA_TO_DEVICE);
+
 	as->smmu = NULL;
 }
 
@@ -504,63 +513,81 @@ static u32 *tegra_smmu_pte_offset(struct page *pt_page, unsigned long iova)
 }
 
 static u32 *tegra_smmu_pte_lookup(struct tegra_smmu_as *as, unsigned long iova,
-				  struct page **pagep)
+				  dma_addr_t *dmap)
 {
 	unsigned int pd_index = iova_pd_index(iova);
 	struct page *pt_page;
+	u32 *pd;
 
 	pt_page = as->pts[pd_index];
 	if (!pt_page)
 		return NULL;
 
-	*pagep = pt_page;
+	pd = page_address(as->pd);
+	*dmap = smmu_pde_to_dma(pd[pd_index]);
 
 	return tegra_smmu_pte_offset(pt_page, iova);
 }
 
 static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova,
-		       struct page **pagep)
+		       dma_addr_t *dmap)
 {
 	u32 *pd = page_address(as->pd), *pt;
 	unsigned int pde = iova_pd_index(iova);
 	struct tegra_smmu *smmu = as->smmu;
-	struct page *page;
 	unsigned int i;
 
 	if (!as->pts[pde]) {
+		struct page *page;
+		dma_addr_t dma;
+
 		page = alloc_page(GFP_KERNEL | __GFP_DMA);
 		if (!page)
 			return NULL;
 
 		pt = page_address(page);
-		SetPageReserved(page);
 
 		for (i = 0; i < SMMU_NUM_PTE; i++)
 			pt[i] = 0;
 
+		dma = dma_map_page(smmu->dev, page, 0, SMMU_SIZE_PT,
+				   DMA_TO_DEVICE);
+		if (dma_mapping_error(smmu->dev, dma)) {
+			__free_page(page);
+			return NULL;
+		}
+
+		if (!smmu_dma_addr_valid(smmu, dma)) {
+			dma_unmap_page(smmu->dev, dma, SMMU_SIZE_PT,
+				       DMA_TO_DEVICE);
+			__free_page(page);
+			return NULL;
+		}
+
 		as->pts[pde] = page;
 
-		smmu_flush_dcache(page, 0, SMMU_SIZE_PT);
+		SetPageReserved(page);
 
-		pd[pde] = SMMU_MK_PDE(page, SMMU_PDE_ATTR | SMMU_PDE_NEXT);
+		pd[pde] = SMMU_MK_PDE(dma, SMMU_PDE_ATTR | SMMU_PDE_NEXT);
 
-		smmu_flush_dcache(as->pd, pde << 2, 4);
-		smmu_flush_ptc(smmu, page_to_phys(as->pd), pde << 2);
+		dma_sync_single_range_for_device(smmu->dev, as->pd_dma,
+						 pde << 2, 4, DMA_TO_DEVICE);
+		smmu_flush_ptc(smmu, as->pd_dma, pde << 2);
 		smmu_flush_tlb_section(smmu, as->id, iova);
 		smmu_flush(smmu);
+
+		*dmap = dma;
 	} else {
-		page = as->pts[pde];
+		*dmap = smmu_pde_to_dma(pd[pde]);
 	}
 
-	*pagep = page;
-
-	pt = page_address(page);
+	pt = tegra_smmu_pte_offset(as->pts[pde], iova);
 
 	/* Keep track of entries in this page table. */
-	if (pt[iova_pt_index(iova)] == 0)
+	if (*pt == 0)
 		as->count[pde]++;
 
-	return tegra_smmu_pte_offset(page, iova);
+	return pt;
 }
 
 static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova)
@@ -576,17 +603,20 @@ static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova)
 	 */
 	if (--as->count[pde] == 0) {
 		unsigned int offset = pde * sizeof(*pd);
+		dma_addr_t pte_dma = smmu_pde_to_dma(pd[pde]);
 
 		/* Clear the page directory entry first */
 		pd[pde] = 0;
 
 		/* Flush the page directory entry */
-		smmu_flush_dcache(as->pd, offset, sizeof(*pd));
-		smmu_flush_ptc(smmu, page_to_phys(as->pd), offset);
+		dma_sync_single_range_for_device(smmu->dev, as->pd_dma, offset,
+						 sizeof(*pd), DMA_TO_DEVICE);
+		smmu_flush_ptc(smmu, as->pd_dma, offset);
 		smmu_flush_tlb_section(smmu, as->id, iova);
 		smmu_flush(smmu);
 
 		/* Finally, free the page */
+		dma_unmap_page(smmu->dev, pte_dma, SMMU_SIZE_PT, DMA_TO_DEVICE);
 		ClearPageReserved(page);
 		__free_page(page);
 		as->pts[pde] = NULL;
@@ -594,15 +624,16 @@ static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova)
 }
 
 static void tegra_smmu_set_pte(struct tegra_smmu_as *as, unsigned long iova,
-			       u32 *pte, struct page *pte_page, u32 val)
+			       u32 *pte, dma_addr_t pte_dma, u32 val)
 {
 	struct tegra_smmu *smmu = as->smmu;
 	unsigned long offset = offset_in_page(pte);
 
 	*pte = val;
 
-	smmu_flush_dcache(pte_page, offset, 4);
-	smmu_flush_ptc(smmu, page_to_phys(pte_page), offset);
+	dma_sync_single_range_for_device(smmu->dev, pte_dma, offset,
+					 4, DMA_TO_DEVICE);
+	smmu_flush_ptc(smmu, pte_dma, offset);
 	smmu_flush_tlb_group(smmu, as->id, iova);
 	smmu_flush(smmu);
 }
@@ -611,14 +642,14 @@ static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova,
 			  phys_addr_t paddr, size_t size, int prot)
 {
 	struct tegra_smmu_as *as = to_smmu_as(domain);
-	struct page *page;
+	dma_addr_t pte_dma;
 	u32 *pte;
 
-	pte = as_get_pte(as, iova, &page);
+	pte = as_get_pte(as, iova, &pte_dma);
 	if (!pte)
 		return -ENOMEM;
 
-	tegra_smmu_set_pte(as, iova, pte, page,
+	tegra_smmu_set_pte(as, iova, pte, pte_dma,
 			   __phys_to_pfn(paddr) | SMMU_PTE_ATTR);
 
 	return 0;
@@ -628,14 +659,14 @@ static size_t tegra_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
 			       size_t size)
 {
 	struct tegra_smmu_as *as = to_smmu_as(domain);
-	struct page *pte_page;
+	dma_addr_t pte_dma;
 	u32 *pte;
 
-	pte = tegra_smmu_pte_lookup(as, iova, &pte_page);
+	pte = tegra_smmu_pte_lookup(as, iova, &pte_dma);
 	if (!pte || !*pte)
 		return 0;
 
-	tegra_smmu_set_pte(as, iova, pte, pte_page, 0);
+	tegra_smmu_set_pte(as, iova, pte, pte_dma, 0);
 	tegra_smmu_pte_put_use(as, iova);
 
 	return size;
@@ -645,11 +676,11 @@ static phys_addr_t tegra_smmu_iova_to_phys(struct iommu_domain *domain,
 					   dma_addr_t iova)
 {
 	struct tegra_smmu_as *as = to_smmu_as(domain);
-	struct page *pte_page;
 	unsigned long pfn;
+	dma_addr_t pte_dma;
 	u32 *pte;
 
-	pte = tegra_smmu_pte_lookup(as, iova, &pte_page);
+	pte = tegra_smmu_pte_lookup(as, iova, &pte_dma);
 	if (!pte || !*pte)
 		return 0;
 

From 05a65f06f69fa6c487c2933f2971d9ec4e33eb0d Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Mon, 27 Jul 2015 13:29:57 +0100
Subject: [PATCH 239/734] iommu/tegra-smmu: Remove PageReserved manipulation

Remove the unnecessary manipulation of the PageReserved flags in the
Tegra SMMU driver.  None of this is required as the page(s) remain
private to the SMMU driver.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/iommu/tegra-smmu.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 43b69c8cbe460a..eb9f6068fe2e0f 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -293,7 +293,6 @@ static struct iommu_domain *tegra_smmu_domain_alloc(unsigned type)
 
 	/* clear PDEs */
 	pd = page_address(as->pd);
-	SetPageReserved(as->pd);
 
 	for (i = 0; i < SMMU_NUM_PDE; i++)
 		pd[i] = 0;
@@ -311,7 +310,6 @@ static void tegra_smmu_domain_free(struct iommu_domain *domain)
 	struct tegra_smmu_as *as = to_smmu_as(domain);
 
 	/* TODO: free page directory and page tables */
-	ClearPageReserved(as->pd);
 
 	kfree(as);
 }
@@ -566,8 +564,6 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova,
 
 		as->pts[pde] = page;
 
-		SetPageReserved(page);
-
 		pd[pde] = SMMU_MK_PDE(dma, SMMU_PDE_ATTR | SMMU_PDE_NEXT);
 
 		dma_sync_single_range_for_device(smmu->dev, as->pd_dma,
@@ -617,7 +613,6 @@ static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova)
 
 		/* Finally, free the page */
 		dma_unmap_page(smmu->dev, pte_dma, SMMU_SIZE_PT, DMA_TO_DEVICE);
-		ClearPageReserved(page);
 		__free_page(page);
 		as->pts[pde] = NULL;
 	}

From 707917cbc6ac0c0ea968b5eb635722ea84808286 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Mon, 27 Jul 2015 13:30:02 +0100
Subject: [PATCH 240/734] iommu/tegra-smmu: Use __GFP_ZERO to allocate zeroed
 pages

Rather than explicitly zeroing pages allocated via alloc_page(), add
__GFP_ZERO to the gfp mask to ask the allocator for zeroed pages.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/iommu/tegra-smmu.c | 18 ++----------------
 1 file changed, 2 insertions(+), 16 deletions(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index eb9f6068fe2e0f..27d31f62a822ec 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -258,8 +258,6 @@ static bool tegra_smmu_capable(enum iommu_cap cap)
 static struct iommu_domain *tegra_smmu_domain_alloc(unsigned type)
 {
 	struct tegra_smmu_as *as;
-	unsigned int i;
-	uint32_t *pd;
 
 	if (type != IOMMU_DOMAIN_UNMANAGED)
 		return NULL;
@@ -270,7 +268,7 @@ static struct iommu_domain *tegra_smmu_domain_alloc(unsigned type)
 
 	as->attr = SMMU_PD_READABLE | SMMU_PD_WRITABLE | SMMU_PD_NONSECURE;
 
-	as->pd = alloc_page(GFP_KERNEL | __GFP_DMA);
+	as->pd = alloc_page(GFP_KERNEL | __GFP_DMA | __GFP_ZERO);
 	if (!as->pd) {
 		kfree(as);
 		return NULL;
@@ -291,12 +289,6 @@ static struct iommu_domain *tegra_smmu_domain_alloc(unsigned type)
 		return NULL;
 	}
 
-	/* clear PDEs */
-	pd = page_address(as->pd);
-
-	for (i = 0; i < SMMU_NUM_PDE; i++)
-		pd[i] = 0;
-
 	/* setup aperture */
 	as->domain.geometry.aperture_start = 0;
 	as->domain.geometry.aperture_end = 0xffffffff;
@@ -533,21 +525,15 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova,
 	u32 *pd = page_address(as->pd), *pt;
 	unsigned int pde = iova_pd_index(iova);
 	struct tegra_smmu *smmu = as->smmu;
-	unsigned int i;
 
 	if (!as->pts[pde]) {
 		struct page *page;
 		dma_addr_t dma;
 
-		page = alloc_page(GFP_KERNEL | __GFP_DMA);
+		page = alloc_page(GFP_KERNEL | __GFP_DMA | __GFP_ZERO);
 		if (!page)
 			return NULL;
 
-		pt = page_address(page);
-
-		for (i = 0; i < SMMU_NUM_PTE; i++)
-			pt[i] = 0;
-
 		dma = dma_map_page(smmu->dev, page, 0, SMMU_SIZE_PT,
 				   DMA_TO_DEVICE);
 		if (dma_mapping_error(smmu->dev, dma)) {

From 7ffc6f066eb73b07a0ef7c94d05107aef271ac21 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Thu, 6 Aug 2015 14:56:39 +0200
Subject: [PATCH 241/734] iommu/tegra-smmu: Extract tegra_smmu_pte_get_use()

Extract the use count reference accounting into a separate function and
separate it from allocating the PTE.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
[treding@nvidia.com: extract and write commit message]
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/iommu/tegra-smmu.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 27d31f62a822ec..74ad1f43265aca 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -522,7 +522,7 @@ static u32 *tegra_smmu_pte_lookup(struct tegra_smmu_as *as, unsigned long iova,
 static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova,
 		       dma_addr_t *dmap)
 {
-	u32 *pd = page_address(as->pd), *pt;
+	u32 *pd = page_address(as->pd);
 	unsigned int pde = iova_pd_index(iova);
 	struct tegra_smmu *smmu = as->smmu;
 
@@ -563,13 +563,14 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova,
 		*dmap = smmu_pde_to_dma(pd[pde]);
 	}
 
-	pt = tegra_smmu_pte_offset(as->pts[pde], iova);
+	return tegra_smmu_pte_offset(as->pts[pde], iova);
+}
 
-	/* Keep track of entries in this page table. */
-	if (*pt == 0)
-		as->count[pde]++;
+static void tegra_smmu_pte_get_use(struct tegra_smmu_as *as, unsigned long iova)
+{
+	unsigned int pd_index = iova_pd_index(iova);
 
-	return pt;
+	as->count[pd_index]++;
 }
 
 static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova)
@@ -630,6 +631,10 @@ static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova,
 	if (!pte)
 		return -ENOMEM;
 
+	/* If we aren't overwriting a pre-existing entry, increment use */
+	if (*pte == 0)
+		tegra_smmu_pte_get_use(as, iova);
+
 	tegra_smmu_set_pte(as, iova, pte, pte_dma,
 			   __phys_to_pfn(paddr) | SMMU_PTE_ATTR);
 

From 4080e99b8341f81c4ed1e17d8ef44d171c473a1b Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Mon, 27 Jul 2015 13:30:12 +0100
Subject: [PATCH 242/734] iommu/tegra-smmu: Factor out tegra_smmu_set_pde()

This code is used both when creating a new page directory entry and when
tearing it down, with only the PDE value changing between both cases.

Factor the code out so that it can be reused.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
[treding@nvidia.com: make commit message more accurate]
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/iommu/tegra-smmu.c | 49 ++++++++++++++++++++++----------------
 1 file changed, 28 insertions(+), 21 deletions(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 74ad1f43265aca..2f1481ad4aa51a 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -495,6 +495,27 @@ static void tegra_smmu_detach_dev(struct iommu_domain *domain, struct device *de
 	}
 }
 
+static void tegra_smmu_set_pde(struct tegra_smmu_as *as, unsigned long iova,
+			       u32 value)
+{
+	unsigned int pd_index = iova_pd_index(iova);
+	struct tegra_smmu *smmu = as->smmu;
+	u32 *pd = page_address(as->pd);
+	unsigned long offset = pd_index * sizeof(*pd);
+
+	/* Set the page directory entry first */
+	pd[pd_index] = value;
+
+	/* The flush the page directory entry from caches */
+	dma_sync_single_range_for_device(smmu->dev, as->pd_dma, offset,
+					 sizeof(*pd), DMA_TO_DEVICE);
+
+	/* And flush the iommu */
+	smmu_flush_ptc(smmu, as->pd_dma, offset);
+	smmu_flush_tlb_section(smmu, as->id, iova);
+	smmu_flush(smmu);
+}
+
 static u32 *tegra_smmu_pte_offset(struct page *pt_page, unsigned long iova)
 {
 	u32 *pt = page_address(pt_page);
@@ -522,7 +543,6 @@ static u32 *tegra_smmu_pte_lookup(struct tegra_smmu_as *as, unsigned long iova,
 static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova,
 		       dma_addr_t *dmap)
 {
-	u32 *pd = page_address(as->pd);
 	unsigned int pde = iova_pd_index(iova);
 	struct tegra_smmu *smmu = as->smmu;
 
@@ -550,16 +570,13 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova,
 
 		as->pts[pde] = page;
 
-		pd[pde] = SMMU_MK_PDE(dma, SMMU_PDE_ATTR | SMMU_PDE_NEXT);
-
-		dma_sync_single_range_for_device(smmu->dev, as->pd_dma,
-						 pde << 2, 4, DMA_TO_DEVICE);
-		smmu_flush_ptc(smmu, as->pd_dma, pde << 2);
-		smmu_flush_tlb_section(smmu, as->id, iova);
-		smmu_flush(smmu);
+		tegra_smmu_set_pde(as, iova, SMMU_MK_PDE(dma, SMMU_PDE_ATTR |
+							      SMMU_PDE_NEXT));
 
 		*dmap = dma;
 	} else {
+		u32 *pd = page_address(as->pd);
+
 		*dmap = smmu_pde_to_dma(pd[pde]);
 	}
 
@@ -575,9 +592,7 @@ static void tegra_smmu_pte_get_use(struct tegra_smmu_as *as, unsigned long iova)
 
 static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova)
 {
-	struct tegra_smmu *smmu = as->smmu;
 	unsigned int pde = iova_pd_index(iova);
-	u32 *pd = page_address(as->pd);
 	struct page *page = as->pts[pde];
 
 	/*
@@ -585,20 +600,12 @@ static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova)
 	 * memory page to the system.
 	 */
 	if (--as->count[pde] == 0) {
-		unsigned int offset = pde * sizeof(*pd);
+		struct tegra_smmu *smmu = as->smmu;
+		u32 *pd = page_address(as->pd);
 		dma_addr_t pte_dma = smmu_pde_to_dma(pd[pde]);
 
-		/* Clear the page directory entry first */
-		pd[pde] = 0;
-
-		/* Flush the page directory entry */
-		dma_sync_single_range_for_device(smmu->dev, as->pd_dma, offset,
-						 sizeof(*pd), DMA_TO_DEVICE);
-		smmu_flush_ptc(smmu, as->pd_dma, offset);
-		smmu_flush_tlb_section(smmu, as->id, iova);
-		smmu_flush(smmu);
+		tegra_smmu_set_pde(as, iova, 0);
 
-		/* Finally, free the page */
 		dma_unmap_page(smmu->dev, pte_dma, SMMU_SIZE_PT, DMA_TO_DEVICE);
 		__free_page(page);
 		as->pts[pde] = NULL;

From 11cec15bf3fb498206ef63b1fa26c27689e02d0e Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 6 Aug 2015 14:20:31 +0200
Subject: [PATCH 243/734] iommu/tegra-smmu: Parameterize number of TLB lines

The number of TLB lines was increased from 16 on Tegra30 to 32 on
Tegra114 and later. Parameterize the value so that the initial default
can be set accordingly.

On Tegra30, initializing the value to 32 would effectively disable the
TLB and hence cause massive latencies for memory accesses translated
through the SMMU. This is especially noticeable for isochronuous clients
such as display, whose FIFOs would continuously underrun.

Fixes: 891846516317 ("memory: Add NVIDIA Tegra memory controller support")
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/iommu/tegra-smmu.c      | 9 +++++++--
 drivers/memory/tegra/tegra114.c | 1 +
 drivers/memory/tegra/tegra124.c | 1 +
 drivers/memory/tegra/tegra30.c  | 1 +
 include/soc/tegra/mc.h          | 1 +
 5 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 2f1481ad4aa51a..9305964250acaf 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -27,6 +27,7 @@ struct tegra_smmu {
 	const struct tegra_smmu_soc *soc;
 
 	unsigned long pfn_mask;
+	unsigned long tlb_mask;
 
 	unsigned long *asids;
 	struct mutex lock;
@@ -70,7 +71,8 @@ static inline u32 smmu_readl(struct tegra_smmu *smmu, unsigned long offset)
 #define SMMU_TLB_CONFIG 0x14
 #define  SMMU_TLB_CONFIG_HIT_UNDER_MISS (1 << 29)
 #define  SMMU_TLB_CONFIG_ROUND_ROBIN_ARBITRATION (1 << 28)
-#define  SMMU_TLB_CONFIG_ACTIVE_LINES(x) ((x) & 0x3f)
+#define  SMMU_TLB_CONFIG_ACTIVE_LINES(smmu) \
+	((smmu)->soc->num_tlb_lines & (smmu)->tlb_mask)
 
 #define SMMU_PTC_CONFIG 0x18
 #define  SMMU_PTC_CONFIG_ENABLE (1 << 29)
@@ -901,6 +903,9 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev,
 	smmu->pfn_mask = BIT_MASK(mc->soc->num_address_bits - PAGE_SHIFT) - 1;
 	dev_dbg(dev, "address bits: %u, PFN mask: %#lx\n",
 		mc->soc->num_address_bits, smmu->pfn_mask);
+	smmu->tlb_mask = (smmu->soc->num_tlb_lines << 1) - 1;
+	dev_dbg(dev, "TLB lines: %u, mask: %#lx\n", smmu->soc->num_tlb_lines,
+		smmu->tlb_mask);
 
 	value = SMMU_PTC_CONFIG_ENABLE | SMMU_PTC_CONFIG_INDEX_MAP(0x3f);
 
@@ -910,7 +915,7 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev,
 	smmu_writel(smmu, value, SMMU_PTC_CONFIG);
 
 	value = SMMU_TLB_CONFIG_HIT_UNDER_MISS |
-		SMMU_TLB_CONFIG_ACTIVE_LINES(0x20);
+		SMMU_TLB_CONFIG_ACTIVE_LINES(smmu);
 
 	if (soc->supports_round_robin_arbitration)
 		value |= SMMU_TLB_CONFIG_ROUND_ROBIN_ARBITRATION;
diff --git a/drivers/memory/tegra/tegra114.c b/drivers/memory/tegra/tegra114.c
index 7122f39be9ccc1..8053f70dbfd192 100644
--- a/drivers/memory/tegra/tegra114.c
+++ b/drivers/memory/tegra/tegra114.c
@@ -919,6 +919,7 @@ static const struct tegra_smmu_soc tegra114_smmu_soc = {
 	.num_swgroups = ARRAY_SIZE(tegra114_swgroups),
 	.supports_round_robin_arbitration = false,
 	.supports_request_limit = false,
+	.num_tlb_lines = 32,
 	.num_asids = 4,
 };
 
diff --git a/drivers/memory/tegra/tegra124.c b/drivers/memory/tegra/tegra124.c
index ebda6328385395..7d734befe0ed31 100644
--- a/drivers/memory/tegra/tegra124.c
+++ b/drivers/memory/tegra/tegra124.c
@@ -1029,6 +1029,7 @@ static const struct tegra_smmu_soc tegra132_smmu_soc = {
 	.num_swgroups = ARRAY_SIZE(tegra124_swgroups),
 	.supports_round_robin_arbitration = true,
 	.supports_request_limit = true,
+	.num_tlb_lines = 32,
 	.num_asids = 128,
 };
 
diff --git a/drivers/memory/tegra/tegra30.c b/drivers/memory/tegra/tegra30.c
index 3cb30b69d95bf5..7e0694d80edb3a 100644
--- a/drivers/memory/tegra/tegra30.c
+++ b/drivers/memory/tegra/tegra30.c
@@ -941,6 +941,7 @@ static const struct tegra_smmu_soc tegra30_smmu_soc = {
 	.num_swgroups = ARRAY_SIZE(tegra30_swgroups),
 	.supports_round_robin_arbitration = false,
 	.supports_request_limit = false,
+	.num_tlb_lines = 16,
 	.num_asids = 4,
 };
 
diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h
index d6c3190ec852ed..8cb3a7ecd6f89a 100644
--- a/include/soc/tegra/mc.h
+++ b/include/soc/tegra/mc.h
@@ -61,6 +61,7 @@ struct tegra_smmu_soc {
 	bool supports_round_robin_arbitration;
 	bool supports_request_limit;
 
+	unsigned int num_tlb_lines;
 	unsigned int num_asids;
 };
 

From a130e69f28ba1d180242b581a15d09f06dad9227 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 13 Aug 2015 11:07:25 +0200
Subject: [PATCH 244/734] iommu/amd: Simplify allocation in
 irq_remapping_alloc()

Allocate the irq data only in the loop.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 658ee39e656968..a1585796785bc0 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3947,11 +3947,6 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
 	if (ret < 0)
 		return ret;
 
-	ret = -ENOMEM;
-	data = kzalloc(sizeof(*data), GFP_KERNEL);
-	if (!data)
-		goto out_free_parent;
-
 	if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC) {
 		if (get_irq_table(devid, true))
 			index = info->ioapic_pin;
@@ -3962,7 +3957,6 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
 	}
 	if (index < 0) {
 		pr_warn("Failed to allocate IRTE\n");
-		kfree(data);
 		goto out_free_parent;
 	}
 
@@ -3974,17 +3968,18 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
 			goto out_free_data;
 		}
 
-		if (i > 0) {
-			data = kzalloc(sizeof(*data), GFP_KERNEL);
-			if (!data)
-				goto out_free_data;
-		}
+		ret = -ENOMEM;
+		data = kzalloc(sizeof(*data), GFP_KERNEL);
+		if (!data)
+			goto out_free_data;
+
 		irq_data->hwirq = (devid << 16) + i;
 		irq_data->chip_data = data;
 		irq_data->chip = &amd_ir_chip;
 		irq_remapping_prepare_irte(data, cfg, info, devid, index, i);
 		irq_set_status_flags(virq + i, IRQ_MOVE_PCNTXT);
 	}
+
 	return 0;
 
 out_free_data:

From 4160cd9e5ef9ac9f1c9c429e1606bf08a56c8a49 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 13 Aug 2015 11:31:48 +0200
Subject: [PATCH 245/734] iommu/amd: Make a symbol static

Symbol is only used in that file and can be static.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index a24495eb4e26c5..5ef347a13cb5d5 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -154,7 +154,7 @@ bool amd_iommu_iotlb_sup __read_mostly = true;
 u32 amd_iommu_max_pasid __read_mostly = ~0;
 
 bool amd_iommu_v2_present __read_mostly;
-bool amd_iommu_pc_present __read_mostly;
+static bool amd_iommu_pc_present __read_mostly;
 
 bool amd_iommu_force_isolation __read_mostly;
 

From 23d3a98c13ee0ffe2647121fac7533282643e6f1 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 13 Aug 2015 11:15:13 +0200
Subject: [PATCH 246/734] iommu/amd: Use BUG_ON instead of if () BUG()

Found by a coccicheck script.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu.c    | 4 ++--
 drivers/iommu/amd_iommu_v2.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index a1585796785bc0..f82060e778a23b 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1835,8 +1835,8 @@ static void free_gcr3_table(struct protection_domain *domain)
 		free_gcr3_tbl_level2(domain->gcr3_tbl);
 	else if (domain->glx == 1)
 		free_gcr3_tbl_level1(domain->gcr3_tbl);
-	else if (domain->glx != 0)
-		BUG();
+	else
+		BUG_ON(domain->glx != 0);
 
 	free_page((unsigned long)domain->gcr3_tbl);
 }
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index f7b875bb70d421..1131664b918b0a 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -356,8 +356,8 @@ static void free_pasid_states(struct device_state *dev_state)
 		free_pasid_states_level2(dev_state->states);
 	else if (dev_state->pasid_levels == 1)
 		free_pasid_states_level1(dev_state->states);
-	else if (dev_state->pasid_levels != 0)
-		BUG();
+	else
+		BUG_ON(dev_state->pasid_levels != 0);
 
 	free_page((unsigned long)dev_state->states);
 }

From 30e93761fbf706c0f8a6f7d1abc1b0ddbeea208c Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 13 Aug 2015 11:13:17 +0200
Subject: [PATCH 247/734] iommu/vt-d: Return false instead of 0 in
 irq_remapping_cap()

The function return type is bool, so return false instead
of 0.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/irq_remapping.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 2d9993062ded6b..913455a5fd40e0 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -84,7 +84,7 @@ void set_irq_remapping_broken(void)
 bool irq_remapping_cap(enum irq_remap_cap cap)
 {
 	if (!remap_ops || disable_irq_post)
-		return 0;
+		return false;
 
 	return (remap_ops->capability & (1 << cap));
 }

From dc02e46e8d0234eed9f6e42f50763b406c380bc4 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 13 Aug 2015 11:15:13 +0200
Subject: [PATCH 248/734] iommu/vt-d: Use BUG_ON instead of if () BUG()

Found by a coccicheck script.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 8834765e90c68a..2a7e01798f4bea 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -4760,8 +4760,7 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
 
 	/* Cope with horrid API which requires us to unmap more than the
 	   size argument if it happens to be a large-page mapping. */
-	if (!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level))
-		BUG();
+	BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
 
 	if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
 		size = VTD_PAGE_SIZE << level_to_offset_bits(level);

From b690420a406256c83ef2c7e96466052e5cab7676 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 13 Aug 2015 11:32:18 +0200
Subject: [PATCH 249/734] iommu/vt-d: Make two functions static

These functions are only used in that file and can be
static.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 2a7e01798f4bea..93f16aa9eb305c 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1210,9 +1210,9 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
 /* We can't just free the pages because the IOMMU may still be walking
    the page tables, and may have cached the intermediate levels. The
    pages can only be freed after the IOTLB flush has been done. */
-struct page *domain_unmap(struct dmar_domain *domain,
-			  unsigned long start_pfn,
-			  unsigned long last_pfn)
+static struct page *domain_unmap(struct dmar_domain *domain,
+				 unsigned long start_pfn,
+				 unsigned long last_pfn)
 {
 	struct page *freelist = NULL;
 
@@ -1236,7 +1236,7 @@ struct page *domain_unmap(struct dmar_domain *domain,
 	return freelist;
 }
 
-void dma_free_pagelist(struct page *freelist)
+static void dma_free_pagelist(struct page *freelist)
 {
 	struct page *pg;
 

From 543c8dcf1d3762c6fe372acf78eedc8898709106 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 13 Aug 2015 11:56:59 +0200
Subject: [PATCH 250/734] iommu/vt-d: Access iomem correctly

This fixes wrong accesses to iomem introduced by the kdump
fixing code.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c         | 15 +++++++++------
 drivers/iommu/intel_irq_remapping.c |  4 ++--
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 93f16aa9eb305c..a85077d4335b50 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -2799,15 +2799,18 @@ static void intel_iommu_init_qi(struct intel_iommu *iommu)
 }
 
 static int copy_context_table(struct intel_iommu *iommu,
-			      struct root_entry *old_re,
+			      struct root_entry __iomem *old_re,
 			      struct context_entry **tbl,
 			      int bus, bool ext)
 {
-	struct context_entry *old_ce = NULL, *new_ce = NULL, ce;
 	int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
+	struct context_entry __iomem *old_ce = NULL;
+	struct context_entry *new_ce = NULL, ce;
+	struct root_entry re;
 	phys_addr_t old_ce_phys;
 
 	tbl_idx = ext ? bus * 2 : bus;
+	memcpy_fromio(&re, old_re, sizeof(re));
 
 	for (devfn = 0; devfn < 256; devfn++) {
 		/* First calculate the correct index */
@@ -2827,9 +2830,9 @@ static int copy_context_table(struct intel_iommu *iommu,
 
 			ret = 0;
 			if (devfn < 0x80)
-				old_ce_phys = root_entry_lctp(old_re);
+				old_ce_phys = root_entry_lctp(&re);
 			else
-				old_ce_phys = root_entry_uctp(old_re);
+				old_ce_phys = root_entry_uctp(&re);
 
 			if (!old_ce_phys) {
 				if (ext && devfn == 0) {
@@ -2854,7 +2857,7 @@ static int copy_context_table(struct intel_iommu *iommu,
 		}
 
 		/* Now copy the context entry */
-		ce = old_ce[idx];
+		memcpy_fromio(&ce, old_ce + idx, sizeof(ce));
 
 		if (!__context_present(&ce))
 			continue;
@@ -2898,8 +2901,8 @@ static int copy_context_table(struct intel_iommu *iommu,
 
 static int copy_translation_tables(struct intel_iommu *iommu)
 {
+	struct root_entry __iomem *old_rt;
 	struct context_entry **ctxt_tbls;
-	struct root_entry *old_rt;
 	phys_addr_t old_rt_phys;
 	int ctxt_table_entries;
 	unsigned long flags;
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 27cdfa84ec5bd3..9ec4e0d94ffd5b 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -384,7 +384,7 @@ static int set_msi_sid(struct irte *irte, struct pci_dev *dev)
 
 static int iommu_load_old_irte(struct intel_iommu *iommu)
 {
-	struct irte *old_ir_table;
+	struct irte __iomem *old_ir_table;
 	phys_addr_t irt_phys;
 	unsigned int i;
 	size_t size;
@@ -413,7 +413,7 @@ static int iommu_load_old_irte(struct intel_iommu *iommu)
 		return -ENOMEM;
 
 	/* Copy data over */
-	memcpy(iommu->ir_table->base, old_ir_table, size);
+	memcpy_fromio(iommu->ir_table->base, old_ir_table, size);
 
 	__iommu_flush_cache(iommu, iommu->ir_table->base, size);
 

From 6e6cfbc859481b2af7282170ff732fa5e035d842 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 13 Aug 2015 11:15:13 +0200
Subject: [PATCH 251/734] iommu/msm: Use BUG_ON instead of if () BUG()

Found by a coccicheck script.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/msm_iommu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index 15a2063812fa8d..e321fa517a4526 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -106,8 +106,8 @@ static int __flush_iotlb(struct iommu_domain *domain)
 #endif
 
 	list_for_each_entry(ctx_drvdata, &priv->list_attached, attached_elm) {
-		if (!ctx_drvdata->pdev || !ctx_drvdata->pdev->dev.parent)
-			BUG();
+
+		BUG_ON(!ctx_drvdata->pdev || !ctx_drvdata->pdev->dev.parent);
 
 		iommu_drvdata = dev_get_drvdata(ctx_drvdata->pdev->dev.parent);
 		BUG_ON(!iommu_drvdata);

From 2e169bb3ccc20fea5e6f59d1abcea249c6598163 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 13 Aug 2015 12:01:10 +0200
Subject: [PATCH 252/734] iommu/io-pgtable-arm: Move init-fn declarations to
 io-pgtable.h

Avoid extern declarations in c files.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/io-pgtable.c | 5 -----
 drivers/iommu/io-pgtable.h | 5 +++++
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c
index 6436fe24bc2f6f..6f2e319d4f04a5 100644
--- a/drivers/iommu/io-pgtable.c
+++ b/drivers/iommu/io-pgtable.c
@@ -24,11 +24,6 @@
 
 #include "io-pgtable.h"
 
-extern struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s1_init_fns;
-extern struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s2_init_fns;
-extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns;
-extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns;
-
 static const struct io_pgtable_init_fns *
 io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] =
 {
diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h
index 48538a35d0785d..ac9e2341a633ed 100644
--- a/drivers/iommu/io-pgtable.h
+++ b/drivers/iommu/io-pgtable.h
@@ -143,4 +143,9 @@ struct io_pgtable_init_fns {
 	void (*free)(struct io_pgtable *iop);
 };
 
+extern struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s1_init_fns;
+extern struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s2_init_fns;
+extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns;
+extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns;
+
 #endif /* __IO_PGTABLE_H */

From cfc411e7fff3e15cd6354ff69773907e2c9d1c0c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 14 Aug 2015 15:20:41 +0100
Subject: [PATCH 253/734] Move certificate handling to its own directory

Move certificate handling out of the kernel/ directory and into a certs/
directory to get all the weird stuff in one place and move the generated
signing keys into this directory.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: David Woodhouse <David.Woodhouse@intel.com>
---
 Documentation/module-signing.txt        |  18 +--
 MAINTAINERS                             |   9 ++
 Makefile                                |   4 +-
 certs/Kconfig                           |  42 +++++++
 certs/Makefile                          | 147 ++++++++++++++++++++++++
 {kernel => certs}/system_certificates.S |   4 +-
 {kernel => certs}/system_keyring.c      |   0
 crypto/Kconfig                          |   1 +
 init/Kconfig                            |  39 -------
 kernel/Makefile                         | 143 -----------------------
 10 files changed, 212 insertions(+), 195 deletions(-)
 create mode 100644 certs/Kconfig
 create mode 100644 certs/Makefile
 rename {kernel => certs}/system_certificates.S (86%)
 rename {kernel => certs}/system_keyring.c (100%)

diff --git a/Documentation/module-signing.txt b/Documentation/module-signing.txt
index 02a9baf1c72fd0..a78bf1ffa68cb4 100644
--- a/Documentation/module-signing.txt
+++ b/Documentation/module-signing.txt
@@ -92,13 +92,13 @@ This has a number of options available:
  (4) "File name or PKCS#11 URI of module signing key" (CONFIG_MODULE_SIG_KEY)
 
      Setting this option to something other than its default of
-     "signing_key.pem" will disable the autogeneration of signing keys and
-     allow the kernel modules to be signed with a key of your choosing.
-     The string provided should identify a file containing both a private
-     key and its corresponding X.509 certificate in PEM form, or — on
-     systems where the OpenSSL ENGINE_pkcs11 is functional — a PKCS#11 URI
-     as defined by RFC7512. In the latter case, the PKCS#11 URI should
-     reference both a certificate and a private key.
+     "certs/signing_key.pem" will disable the autogeneration of signing keys
+     and allow the kernel modules to be signed with a key of your choosing.
+     The string provided should identify a file containing both a private key
+     and its corresponding X.509 certificate in PEM form, or — on systems where
+     the OpenSSL ENGINE_pkcs11 is functional — a PKCS#11 URI as defined by
+     RFC7512. In the latter case, the PKCS#11 URI should reference both a
+     certificate and a private key.
 
      If the PEM file containing the private key is encrypted, or if the
      PKCS#11 token requries a PIN, this can be provided at build time by
@@ -130,12 +130,12 @@ Under normal conditions, when CONFIG_MODULE_SIG_KEY is unchanged from its
 default, the kernel build will automatically generate a new keypair using
 openssl if one does not exist in the file:
 
-	signing_key.pem
+	certs/signing_key.pem
 
 during the building of vmlinux (the public part of the key needs to be built
 into vmlinux) using parameters in the:
 
-	x509.genkey
+	certs/x509.genkey
 
 file (which is also generated if it does not already exist).
 
diff --git a/MAINTAINERS b/MAINTAINERS
index bde2e3f5a10b3b..294dc59ed5e178 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2589,6 +2589,15 @@ S:	Supported
 F:	Documentation/filesystems/ceph.txt
 F:	fs/ceph/
 
+CERTIFICATE HANDLING:
+M:	David Howells <dhowells@redhat.com>
+M:	David Woodhouse <dwmw2@infradead.org>
+L:	keyrings@linux-nfs.org
+S:	Maintained
+F:	Documentation/module-signing.txt
+F:	certs/
+F:	scripts/extract-cert.c
+
 CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM:
 L:	linux-usb@vger.kernel.org
 S:	Orphan
diff --git a/Makefile b/Makefile
index 6ab99d8cc23c01..2341942feb850c 100644
--- a/Makefile
+++ b/Makefile
@@ -871,7 +871,7 @@ INITRD_COMPRESS-$(CONFIG_RD_LZ4)   := lz4
 
 ifdef CONFIG_MODULE_SIG_ALL
 MODSECKEY = $(CONFIG_MODULE_SIG_KEY)
-MODPUBKEY = ./signing_key.x509
+MODPUBKEY = certs/signing_key.x509
 export MODPUBKEY
 mod_sign_cmd = scripts/sign-file $(CONFIG_MODULE_SIG_HASH) $(MODSECKEY) $(MODPUBKEY)
 else
@@ -881,7 +881,7 @@ export mod_sign_cmd
 
 
 ifeq ($(KBUILD_EXTMOD),)
-core-y		+= kernel/ mm/ fs/ ipc/ security/ crypto/ block/
+core-y		+= kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/
 
 vmlinux-dirs	:= $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
 		     $(core-y) $(core-m) $(drivers-y) $(drivers-m) \
diff --git a/certs/Kconfig b/certs/Kconfig
new file mode 100644
index 00000000000000..b030b9c7ed34ad
--- /dev/null
+++ b/certs/Kconfig
@@ -0,0 +1,42 @@
+menu "Certificates for signature checking"
+
+config MODULE_SIG_KEY
+	string "File name or PKCS#11 URI of module signing key"
+	default "certs/signing_key.pem"
+	depends on MODULE_SIG
+	help
+         Provide the file name of a private key/certificate in PEM format,
+         or a PKCS#11 URI according to RFC7512. The file should contain, or
+         the URI should identify, both the certificate and its corresponding
+         private key.
+
+         If this option is unchanged from its default "certs/signing_key.pem",
+         then the kernel will automatically generate the private key and
+         certificate as described in Documentation/module-signing.txt
+
+config SYSTEM_TRUSTED_KEYRING
+	bool "Provide system-wide ring of trusted keys"
+	depends on KEYS
+	help
+	  Provide a system keyring to which trusted keys can be added.  Keys in
+	  the keyring are considered to be trusted.  Keys may be added at will
+	  by the kernel from compiled-in data and from hardware key stores, but
+	  userspace may only add extra keys if those keys can be verified by
+	  keys already in the keyring.
+
+	  Keys in this keyring are used by module signature checking.
+
+config SYSTEM_TRUSTED_KEYS
+	string "Additional X.509 keys for default system keyring"
+	depends on SYSTEM_TRUSTED_KEYRING
+	help
+	  If set, this option should be the filename of a PEM-formatted file
+	  containing trusted X.509 certificates to be included in the default
+	  system keyring. Any certificate used for module signing is implicitly
+	  also trusted.
+
+	  NOTE: If you previously provided keys for the system keyring in the
+	  form of DER-encoded *.x509 files in the top-level build directory,
+	  those are no longer used. You will need to set this option instead.
+
+endmenu
diff --git a/certs/Makefile b/certs/Makefile
new file mode 100644
index 00000000000000..5d33486d3b2078
--- /dev/null
+++ b/certs/Makefile
@@ -0,0 +1,147 @@
+#
+# Makefile for the linux kernel signature checking certificates.
+#
+
+obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o
+
+###############################################################################
+#
+# When a Kconfig string contains a filename, it is suitable for
+# passing to shell commands. It is surrounded by double-quotes, and
+# any double-quotes or backslashes within it are escaped by
+# backslashes.
+#
+# This is no use for dependencies or $(wildcard). We need to strip the
+# surrounding quotes and the escaping from quotes and backslashes, and
+# we *do* need to escape any spaces in the string. So, for example:
+#
+# Usage: $(eval $(call config_filename,FOO))
+#
+# Defines FOO_FILENAME based on the contents of the CONFIG_FOO option,
+# transformed as described above to be suitable for use within the
+# makefile.
+#
+# Also, if the filename is a relative filename and exists in the source
+# tree but not the build tree, define FOO_SRCPREFIX as $(srctree)/ to
+# be prefixed to *both* command invocation and dependencies.
+#
+# Note: We also print the filenames in the quiet_cmd_foo text, and
+# perhaps ought to have a version specially escaped for that purpose.
+# But it's only cosmetic, and $(patsubst "%",%,$(CONFIG_FOO)) is good
+# enough.  It'll strip the quotes in the common case where there's no
+# space and it's a simple filename, and it'll retain the quotes when
+# there's a space. There are some esoteric cases in which it'll print
+# the wrong thing, but we don't really care. The actual dependencies
+# and commands *do* get it right, with various combinations of single
+# and double quotes, backslashes and spaces in the filenames.
+#
+###############################################################################
+#
+quote := $(firstword " ")
+space :=
+space +=
+space_escape := %%%SPACE%%%
+#
+define config_filename
+ifneq ($$(CONFIG_$(1)),"")
+$(1)_FILENAME := $$(subst \\,\,$$(subst \$$(quote),$$(quote),$$(subst $$(space_escape),\$$(space),$$(patsubst "%",%,$$(subst $$(space),$$(space_escape),$$(CONFIG_$(1)))))))
+ifneq ($$(patsubst /%,%,$$(firstword $$($(1)_FILENAME))),$$(firstword $$($(1)_FILENAME)))
+else
+ifeq ($$(wildcard $$($(1)_FILENAME)),)
+ifneq ($$(wildcard $$(srctree)/$$($(1)_FILENAME)),)
+$(1)_SRCPREFIX := $(srctree)/
+endif
+endif
+endif
+endif
+endef
+#
+###############################################################################
+
+ifeq ($(CONFIG_SYSTEM_TRUSTED_KEYRING),y)
+
+$(eval $(call config_filename,SYSTEM_TRUSTED_KEYS))
+
+# GCC doesn't include .incbin files in -MD generated dependencies (PR#66871)
+$(obj)/system_certificates.o: $(obj)/x509_certificate_list
+
+# Cope with signing_key.x509 existing in $(srctree) not $(objtree)
+AFLAGS_system_certificates.o := -I$(srctree)
+
+quiet_cmd_extract_certs  = EXTRACT_CERTS   $(patsubst "%",%,$(2))
+      cmd_extract_certs  = scripts/extract-cert $(2) $@ || ( rm $@; exit 1)
+
+targets += x509_certificate_list
+$(obj)/x509_certificate_list: scripts/extract-cert $(SYSTEM_TRUSTED_KEYS_SRCPREFIX)$(SYSTEM_TRUSTED_KEYS_FILENAME) FORCE
+	$(call if_changed,extract_certs,$(SYSTEM_TRUSTED_KEYS_SRCPREFIX)$(CONFIG_SYSTEM_TRUSTED_KEYS))
+endif
+
+clean-files := x509_certificate_list .x509.list
+
+ifeq ($(CONFIG_MODULE_SIG),y)
+###############################################################################
+#
+# If module signing is requested, say by allyesconfig, but a key has not been
+# supplied, then one will need to be generated to make sure the build does not
+# fail and that the kernel may be used afterwards.
+#
+###############################################################################
+ifndef CONFIG_MODULE_SIG_HASH
+$(error Could not determine digest type to use from kernel config)
+endif
+
+# We do it this way rather than having a boolean option for enabling an
+# external private key, because 'make randconfig' might enable such a
+# boolean option and we unfortunately can't make it depend on !RANDCONFIG.
+ifeq ($(CONFIG_MODULE_SIG_KEY),"certs/signing_key.pem")
+$(obj)/signing_key.pem: $(obj)/x509.genkey
+	@echo "###"
+	@echo "### Now generating an X.509 key pair to be used for signing modules."
+	@echo "###"
+	@echo "### If this takes a long time, you might wish to run rngd in the"
+	@echo "### background to keep the supply of entropy topped up.  It"
+	@echo "### needs to be run as root, and uses a hardware random"
+	@echo "### number generator if one is available."
+	@echo "###"
+	openssl req -new -nodes -utf8 -$(CONFIG_MODULE_SIG_HASH) -days 36500 \
+		-batch -x509 -config $(obj)/x509.genkey \
+		-outform PEM -out $(obj)/signing_key.pem \
+		-keyout $(obj)/signing_key.pem 2>&1
+	@echo "###"
+	@echo "### Key pair generated."
+	@echo "###"
+
+$(obj)/x509.genkey:
+	@echo Generating X.509 key generation config
+	@echo  >$@ "[ req ]"
+	@echo >>$@ "default_bits = 4096"
+	@echo >>$@ "distinguished_name = req_distinguished_name"
+	@echo >>$@ "prompt = no"
+	@echo >>$@ "string_mask = utf8only"
+	@echo >>$@ "x509_extensions = myexts"
+	@echo >>$@
+	@echo >>$@ "[ req_distinguished_name ]"
+	@echo >>$@ "#O = Unspecified company"
+	@echo >>$@ "CN = Build time autogenerated kernel key"
+	@echo >>$@ "#emailAddress = unspecified.user@unspecified.company"
+	@echo >>$@
+	@echo >>$@ "[ myexts ]"
+	@echo >>$@ "basicConstraints=critical,CA:FALSE"
+	@echo >>$@ "keyUsage=digitalSignature"
+	@echo >>$@ "subjectKeyIdentifier=hash"
+	@echo >>$@ "authorityKeyIdentifier=keyid"
+endif
+
+$(eval $(call config_filename,MODULE_SIG_KEY))
+
+# If CONFIG_MODULE_SIG_KEY isn't a PKCS#11 URI, depend on it
+ifeq ($(patsubst pkcs11:%,%,$(firstword $(MODULE_SIG_KEY_FILENAME))),$(firstword $(MODULE_SIG_KEY_FILENAME)))
+X509_DEP := $(MODULE_SIG_KEY_SRCPREFIX)$(MODULE_SIG_KEY_FILENAME)
+endif
+
+# GCC PR#66871 again.
+$(obj)/system_certificates.o: $(obj)/signing_key.x509
+
+$(obj)/signing_key.x509: scripts/extract-cert include/config/module/sig/key.h $(X509_DEP)
+	$(call cmd,extract_certs,$(MODULE_SIG_KEY_SRCPREFIX)$(CONFIG_MODULE_SIG_KEY))
+endif
diff --git a/kernel/system_certificates.S b/certs/system_certificates.S
similarity index 86%
rename from kernel/system_certificates.S
rename to certs/system_certificates.S
index 6ba2f75e7ba5f1..9216e8c817649e 100644
--- a/kernel/system_certificates.S
+++ b/certs/system_certificates.S
@@ -8,9 +8,9 @@
 VMLINUX_SYMBOL(system_certificate_list):
 __cert_list_start:
 #ifdef CONFIG_MODULE_SIG
-	.incbin "signing_key.x509"
+	.incbin "certs/signing_key.x509"
 #endif
-	.incbin "kernel/x509_certificate_list"
+	.incbin "certs/x509_certificate_list"
 __cert_list_end:
 
 	.align 8
diff --git a/kernel/system_keyring.c b/certs/system_keyring.c
similarity index 100%
rename from kernel/system_keyring.c
rename to certs/system_keyring.c
diff --git a/crypto/Kconfig b/crypto/Kconfig
index b4cfc5754033b9..51b01de7c0aecf 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1601,5 +1601,6 @@ config CRYPTO_HASH_INFO
 
 source "drivers/crypto/Kconfig"
 source crypto/asymmetric_keys/Kconfig
+source certs/Kconfig
 
 endif	# if CRYPTO
diff --git a/init/Kconfig b/init/Kconfig
index 5d1a703663ad98..5526dfaac62818 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1740,31 +1740,6 @@ config MMAP_ALLOW_UNINITIALIZED
 
 	  See Documentation/nommu-mmap.txt for more information.
 
-config SYSTEM_TRUSTED_KEYRING
-	bool "Provide system-wide ring of trusted keys"
-	depends on KEYS
-	help
-	  Provide a system keyring to which trusted keys can be added.  Keys in
-	  the keyring are considered to be trusted.  Keys may be added at will
-	  by the kernel from compiled-in data and from hardware key stores, but
-	  userspace may only add extra keys if those keys can be verified by
-	  keys already in the keyring.
-
-	  Keys in this keyring are used by module signature checking.
-
-config SYSTEM_TRUSTED_KEYS
-	string "Additional X.509 keys for default system keyring"
-	depends on SYSTEM_TRUSTED_KEYRING
-	help
-	  If set, this option should be the filename of a PEM-formatted file
-	  containing trusted X.509 certificates to be included in the default
-	  system keyring. Any certificate used for module signing is implicitly
-	  also trusted.
-
-	  NOTE: If you previously provided keys for the system keyring in the
-	  form of DER-encoded *.x509 files in the top-level build directory,
-	  those are no longer used. You will need to set this option instead.
-
 config SYSTEM_DATA_VERIFICATION
 	def_bool n
 	select SYSTEM_TRUSTED_KEYRING
@@ -1965,20 +1940,6 @@ config MODULE_SIG_HASH
 	default "sha384" if MODULE_SIG_SHA384
 	default "sha512" if MODULE_SIG_SHA512
 
-config MODULE_SIG_KEY
-	string "File name or PKCS#11 URI of module signing key"
-	default "signing_key.pem"
-	depends on MODULE_SIG
-	help
-         Provide the file name of a private key/certificate in PEM format,
-         or a PKCS#11 URI according to RFC7512. The file should contain, or
-         the URI should identify, both the certificate and its corresponding
-         private key.
-
-         If this option is unchanged from its default "signing_key.pem",
-         then the kernel will automatically generate the private key and
-         certificate as described in Documentation/module-signing.txt
-
 config MODULE_COMPRESS
 	bool "Compress modules on installation"
 	depends on MODULES
diff --git a/kernel/Makefile b/kernel/Makefile
index 65ef3846fbe89e..1aa153a1be2121 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -45,7 +45,6 @@ ifneq ($(CONFIG_SMP),y)
 obj-y += up.o
 endif
 obj-$(CONFIG_UID16) += uid16.o
-obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o
 obj-$(CONFIG_MODULES) += module.o
 obj-$(CONFIG_MODULE_SIG) += module_signing.o
 obj-$(CONFIG_KALLSYMS) += kallsyms.o
@@ -111,145 +110,3 @@ $(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE
 targets += config_data.h
 $(obj)/config_data.h: $(obj)/config_data.gz FORCE
 	$(call filechk,ikconfiggz)
-
-###############################################################################
-#
-# When a Kconfig string contains a filename, it is suitable for
-# passing to shell commands. It is surrounded by double-quotes, and
-# any double-quotes or backslashes within it are escaped by
-# backslashes.
-#
-# This is no use for dependencies or $(wildcard). We need to strip the
-# surrounding quotes and the escaping from quotes and backslashes, and
-# we *do* need to escape any spaces in the string. So, for example:
-#
-# Usage: $(eval $(call config_filename,FOO))
-#
-# Defines FOO_FILENAME based on the contents of the CONFIG_FOO option,
-# transformed as described above to be suitable for use within the
-# makefile.
-#
-# Also, if the filename is a relative filename and exists in the source
-# tree but not the build tree, define FOO_SRCPREFIX as $(srctree)/ to
-# be prefixed to *both* command invocation and dependencies.
-#
-# Note: We also print the filenames in the quiet_cmd_foo text, and
-# perhaps ought to have a version specially escaped for that purpose.
-# But it's only cosmetic, and $(patsubst "%",%,$(CONFIG_FOO)) is good
-# enough.  It'll strip the quotes in the common case where there's no
-# space and it's a simple filename, and it'll retain the quotes when
-# there's a space. There are some esoteric cases in which it'll print
-# the wrong thing, but we don't really care. The actual dependencies
-# and commands *do* get it right, with various combinations of single
-# and double quotes, backslashes and spaces in the filenames.
-#
-###############################################################################
-#
-quote := $(firstword " ")
-space :=
-space +=
-space_escape := %%%SPACE%%%
-#
-define config_filename
-ifneq ($$(CONFIG_$(1)),"")
-$(1)_FILENAME := $$(subst \\,\,$$(subst \$$(quote),$$(quote),$$(subst $$(space_escape),\$$(space),$$(patsubst "%",%,$$(subst $$(space),$$(space_escape),$$(CONFIG_$(1)))))))
-ifneq ($$(patsubst /%,%,$$(firstword $$($(1)_FILENAME))),$$(firstword $$($(1)_FILENAME)))
-else
-ifeq ($$(wildcard $$($(1)_FILENAME)),)
-ifneq ($$(wildcard $$(srctree)/$$($(1)_FILENAME)),)
-$(1)_SRCPREFIX := $(srctree)/
-endif
-endif
-endif
-endif
-endef
-#
-###############################################################################
-
-ifeq ($(CONFIG_SYSTEM_TRUSTED_KEYRING),y)
-
-$(eval $(call config_filename,SYSTEM_TRUSTED_KEYS))
-
-# GCC doesn't include .incbin files in -MD generated dependencies (PR#66871)
-$(obj)/system_certificates.o: $(obj)/x509_certificate_list
-
-# Cope with signing_key.x509 existing in $(srctree) not $(objtree)
-AFLAGS_system_certificates.o := -I$(srctree)
-
-quiet_cmd_extract_certs  = EXTRACT_CERTS   $(patsubst "%",%,$(2))
-      cmd_extract_certs  = scripts/extract-cert $(2) $@ || ( rm $@; exit 1)
-
-targets += x509_certificate_list
-$(obj)/x509_certificate_list: scripts/extract-cert $(SYSTEM_TRUSTED_KEYS_SRCPREFIX)$(SYSTEM_TRUSTED_KEYS_FILENAME) FORCE
-	$(call if_changed,extract_certs,$(SYSTEM_TRUSTED_KEYS_SRCPREFIX)$(CONFIG_SYSTEM_TRUSTED_KEYS))
-endif
-
-clean-files := x509_certificate_list .x509.list
-
-ifeq ($(CONFIG_MODULE_SIG),y)
-###############################################################################
-#
-# If module signing is requested, say by allyesconfig, but a key has not been
-# supplied, then one will need to be generated to make sure the build does not
-# fail and that the kernel may be used afterwards.
-#
-###############################################################################
-ifndef CONFIG_MODULE_SIG_HASH
-$(error Could not determine digest type to use from kernel config)
-endif
-
-# We do it this way rather than having a boolean option for enabling an
-# external private key, because 'make randconfig' might enable such a
-# boolean option and we unfortunately can't make it depend on !RANDCONFIG.
-ifeq ($(CONFIG_MODULE_SIG_KEY),"signing_key.pem")
-signing_key.pem: x509.genkey
-	@echo "###"
-	@echo "### Now generating an X.509 key pair to be used for signing modules."
-	@echo "###"
-	@echo "### If this takes a long time, you might wish to run rngd in the"
-	@echo "### background to keep the supply of entropy topped up.  It"
-	@echo "### needs to be run as root, and uses a hardware random"
-	@echo "### number generator if one is available."
-	@echo "###"
-	openssl req -new -nodes -utf8 -$(CONFIG_MODULE_SIG_HASH) -days 36500 \
-		-batch -x509 -config x509.genkey \
-		-outform PEM -out signing_key.pem \
-		-keyout signing_key.pem 2>&1
-	@echo "###"
-	@echo "### Key pair generated."
-	@echo "###"
-
-x509.genkey:
-	@echo Generating X.509 key generation config
-	@echo  >x509.genkey "[ req ]"
-	@echo >>x509.genkey "default_bits = 4096"
-	@echo >>x509.genkey "distinguished_name = req_distinguished_name"
-	@echo >>x509.genkey "prompt = no"
-	@echo >>x509.genkey "string_mask = utf8only"
-	@echo >>x509.genkey "x509_extensions = myexts"
-	@echo >>x509.genkey
-	@echo >>x509.genkey "[ req_distinguished_name ]"
-	@echo >>x509.genkey "#O = Unspecified company"
-	@echo >>x509.genkey "CN = Build time autogenerated kernel key"
-	@echo >>x509.genkey "#emailAddress = unspecified.user@unspecified.company"
-	@echo >>x509.genkey
-	@echo >>x509.genkey "[ myexts ]"
-	@echo >>x509.genkey "basicConstraints=critical,CA:FALSE"
-	@echo >>x509.genkey "keyUsage=digitalSignature"
-	@echo >>x509.genkey "subjectKeyIdentifier=hash"
-	@echo >>x509.genkey "authorityKeyIdentifier=keyid"
-endif
-
-$(eval $(call config_filename,MODULE_SIG_KEY))
-
-# If CONFIG_MODULE_SIG_KEY isn't a PKCS#11 URI, depend on it
-ifeq ($(patsubst pkcs11:%,%,$(firstword $(MODULE_SIG_KEY_FILENAME))),$(firstword $(MODULE_SIG_KEY_FILENAME)))
-X509_DEP := $(MODULE_SIG_KEY_SRCPREFIX)$(MODULE_SIG_KEY_FILENAME)
-endif
-
-# GCC PR#66871 again.
-$(obj)/system_certificates.o: signing_key.x509
-
-signing_key.x509: scripts/extract-cert include/config/module/sig/key.h $(X509_DEP)
-	$(call cmd,extract_certs,$(MODULE_SIG_KEY_SRCPREFIX)$(CONFIG_MODULE_SIG_KEY))
-endif

From 62172c81f2d6c2311284467d6aaa00bd8967e136 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Fri, 14 Aug 2015 15:33:56 +0100
Subject: [PATCH 254/734] modsign: Use if_changed rule for extracting cert from
 module signing key

We couldn't use if_changed for this before, because it didn't live in
the kernel/ directory so we couldn't add it to $(targets). It was easier
just to leave it as it was.

Now it's in the certs/ directory we can use if_changed, the same as we
do for the trusted certificate list.

Aside from making things consistent, this means we don't need to depend
explicitly on the include/config/module/sig/key.h file. And we also get
to automatically do the right thing and re-extract the cert if the user
does odd things like using a relative filename and then playing silly
buggers with adding/removing that file in both the source and object
trees. We always favour the one in the object tree if it exists, and
now we'll correctly re-extract the cert when it changes. Previously we'd
*only* re-extract the cert if the config option changed, even if the
actual file we're using did change.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 certs/Makefile | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/certs/Makefile b/certs/Makefile
index 5d33486d3b2078..3c782d025c36c9 100644
--- a/certs/Makefile
+++ b/certs/Makefile
@@ -142,6 +142,7 @@ endif
 # GCC PR#66871 again.
 $(obj)/system_certificates.o: $(obj)/signing_key.x509
 
-$(obj)/signing_key.x509: scripts/extract-cert include/config/module/sig/key.h $(X509_DEP)
-	$(call cmd,extract_certs,$(MODULE_SIG_KEY_SRCPREFIX)$(CONFIG_MODULE_SIG_KEY))
+targets += signing_key.x509
+$(obj)/signing_key.x509: scripts/extract-cert $(X509_DEP) FORCE
+	$(call if_changed,extract_certs,$(MODULE_SIG_KEY_SRCPREFIX)$(CONFIG_MODULE_SIG_KEY))
 endif

From 3ee550f12c1529a023f71c9b5becb3351911047b Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Fri, 14 Aug 2015 16:17:16 +0100
Subject: [PATCH 255/734] modsign: Handle signing key in source tree

Since commit 1329e8cc69 ("modsign: Extract signing cert from
CONFIG_MODULE_SIG_KEY if needed"), the build system has carefully coped
with the signing key being specified as a relative path in either the
source or or the build trees.

However, the actual signing of modules has not worked if the filename
is relative to the source tree.

Fix that by moving the config_filename helper into scripts/Kbuild.include
so that it can be used from elsewhere, and then using it in the top-level
Makefile to find the signing key file.

Kill the intermediate $(MODPUBKEY) and $(MODSECKEY) variables too, while
we're at it. There's no need for them.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 Makefile               |  7 +++---
 certs/Makefile         | 54 ------------------------------------------
 scripts/Kbuild.include | 51 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 54 insertions(+), 58 deletions(-)

diff --git a/Makefile b/Makefile
index 2341942feb850c..7c90dda898f6ed 100644
--- a/Makefile
+++ b/Makefile
@@ -870,10 +870,9 @@ INITRD_COMPRESS-$(CONFIG_RD_LZ4)   := lz4
 # export INITRD_COMPRESS := $(INITRD_COMPRESS-y)
 
 ifdef CONFIG_MODULE_SIG_ALL
-MODSECKEY = $(CONFIG_MODULE_SIG_KEY)
-MODPUBKEY = certs/signing_key.x509
-export MODPUBKEY
-mod_sign_cmd = scripts/sign-file $(CONFIG_MODULE_SIG_HASH) $(MODSECKEY) $(MODPUBKEY)
+$(eval $(call config_filename,MODULE_SIG_KEY))
+
+mod_sign_cmd = scripts/sign-file $(CONFIG_MODULE_SIG_HASH) $(MODULE_SIG_KEY_SRCPREFIX)$(CONFIG_MODULE_SIG_KEY) certs/signing_key.x509
 else
 mod_sign_cmd = true
 endif
diff --git a/certs/Makefile b/certs/Makefile
index 3c782d025c36c9..28ac694dd11aac 100644
--- a/certs/Makefile
+++ b/certs/Makefile
@@ -4,60 +4,6 @@
 
 obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o
 
-###############################################################################
-#
-# When a Kconfig string contains a filename, it is suitable for
-# passing to shell commands. It is surrounded by double-quotes, and
-# any double-quotes or backslashes within it are escaped by
-# backslashes.
-#
-# This is no use for dependencies or $(wildcard). We need to strip the
-# surrounding quotes and the escaping from quotes and backslashes, and
-# we *do* need to escape any spaces in the string. So, for example:
-#
-# Usage: $(eval $(call config_filename,FOO))
-#
-# Defines FOO_FILENAME based on the contents of the CONFIG_FOO option,
-# transformed as described above to be suitable for use within the
-# makefile.
-#
-# Also, if the filename is a relative filename and exists in the source
-# tree but not the build tree, define FOO_SRCPREFIX as $(srctree)/ to
-# be prefixed to *both* command invocation and dependencies.
-#
-# Note: We also print the filenames in the quiet_cmd_foo text, and
-# perhaps ought to have a version specially escaped for that purpose.
-# But it's only cosmetic, and $(patsubst "%",%,$(CONFIG_FOO)) is good
-# enough.  It'll strip the quotes in the common case where there's no
-# space and it's a simple filename, and it'll retain the quotes when
-# there's a space. There are some esoteric cases in which it'll print
-# the wrong thing, but we don't really care. The actual dependencies
-# and commands *do* get it right, with various combinations of single
-# and double quotes, backslashes and spaces in the filenames.
-#
-###############################################################################
-#
-quote := $(firstword " ")
-space :=
-space +=
-space_escape := %%%SPACE%%%
-#
-define config_filename
-ifneq ($$(CONFIG_$(1)),"")
-$(1)_FILENAME := $$(subst \\,\,$$(subst \$$(quote),$$(quote),$$(subst $$(space_escape),\$$(space),$$(patsubst "%",%,$$(subst $$(space),$$(space_escape),$$(CONFIG_$(1)))))))
-ifneq ($$(patsubst /%,%,$$(firstword $$($(1)_FILENAME))),$$(firstword $$($(1)_FILENAME)))
-else
-ifeq ($$(wildcard $$($(1)_FILENAME)),)
-ifneq ($$(wildcard $$(srctree)/$$($(1)_FILENAME)),)
-$(1)_SRCPREFIX := $(srctree)/
-endif
-endif
-endif
-endif
-endef
-#
-###############################################################################
-
 ifeq ($(CONFIG_SYSTEM_TRUSTED_KEYRING),y)
 
 $(eval $(call config_filename,SYSTEM_TRUSTED_KEYS))
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index d3437b82ac256c..608ac65c61e34c 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -303,3 +303,54 @@ why =                                                                        \
 
 echo-why = $(call escsq, $(strip $(why)))
 endif
+
+###############################################################################
+#
+# When a Kconfig string contains a filename, it is suitable for
+# passing to shell commands. It is surrounded by double-quotes, and
+# any double-quotes or backslashes within it are escaped by
+# backslashes.
+#
+# This is no use for dependencies or $(wildcard). We need to strip the
+# surrounding quotes and the escaping from quotes and backslashes, and
+# we *do* need to escape any spaces in the string. So, for example:
+#
+# Usage: $(eval $(call config_filename,FOO))
+#
+# Defines FOO_FILENAME based on the contents of the CONFIG_FOO option,
+# transformed as described above to be suitable for use within the
+# makefile.
+#
+# Also, if the filename is a relative filename and exists in the source
+# tree but not the build tree, define FOO_SRCPREFIX as $(srctree)/ to
+# be prefixed to *both* command invocation and dependencies.
+#
+# Note: We also print the filenames in the quiet_cmd_foo text, and
+# perhaps ought to have a version specially escaped for that purpose.
+# But it's only cosmetic, and $(patsubst "%",%,$(CONFIG_FOO)) is good
+# enough.  It'll strip the quotes in the common case where there's no
+# space and it's a simple filename, and it'll retain the quotes when
+# there's a space. There are some esoteric cases in which it'll print
+# the wrong thing, but we don't really care. The actual dependencies
+# and commands *do* get it right, with various combinations of single
+# and double quotes, backslashes and spaces in the filenames.
+#
+###############################################################################
+#
+space_escape := %%%SPACE%%%
+#
+define config_filename
+ifneq ($$(CONFIG_$(1)),"")
+$(1)_FILENAME := $$(subst \\,\,$$(subst \$$(quote),$$(quote),$$(subst $$(space_escape),\$$(space),$$(patsubst "%",%,$$(subst $$(space),$$(space_escape),$$(CONFIG_$(1)))))))
+ifneq ($$(patsubst /%,%,$$(firstword $$($(1)_FILENAME))),$$(firstword $$($(1)_FILENAME)))
+else
+ifeq ($$(wildcard $$($(1)_FILENAME)),)
+ifneq ($$(wildcard $$(srctree)/$$($(1)_FILENAME)),)
+$(1)_SRCPREFIX := $(srctree)/
+endif
+endif
+endif
+endif
+endef
+#
+###############################################################################

From 92281dee825f6d2eb07c441437e4196a44b0861c Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 10 Aug 2015 23:07:06 -0400
Subject: [PATCH 256/734] arch: introduce memremap()

Existing users of ioremap_cache() are mapping memory that is known in
advance to not have i/o side effects.  These users are forced to cast
away the __iomem annotation, or otherwise neglect to fix the sparse
errors thrown when dereferencing pointers to this memory.  Provide
memremap() as a non __iomem annotated ioremap_*() in the case when
ioremap is otherwise a pointer to cacheable memory. Empirically,
ioremap_<cacheable-type>() call sites are seeking memory-like semantics
(e.g.  speculative reads, and prefetching permitted).

memremap() is a break from the ioremap implementation pattern of adding
a new memremap_<type>() for each mapping type and having silent
compatibility fall backs.  Instead, the implementation defines flags
that are passed to the central memremap() and if a mapping type is not
supported by an arch memremap returns NULL.

We introduce a memremap prototype as a trivial wrapper of
ioremap_cache() and ioremap_wt().  Later, once all ioremap_cache() and
ioremap_wt() usage has been removed from drivers we teach archs to
implement arch_memremap() with the ability to strictly enforce the
mapping type.

Cc: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/ia64/include/asm/io.h   |  1 +
 arch/sh/include/asm/io.h     |  1 +
 arch/xtensa/include/asm/io.h |  1 +
 include/linux/io.h           |  9 ++++
 kernel/Makefile              |  2 +
 kernel/memremap.c            | 98 ++++++++++++++++++++++++++++++++++++
 6 files changed, 112 insertions(+)
 create mode 100644 kernel/memremap.c

diff --git a/arch/ia64/include/asm/io.h b/arch/ia64/include/asm/io.h
index 80a7e34be00984..9041bbe2b7b42d 100644
--- a/arch/ia64/include/asm/io.h
+++ b/arch/ia64/include/asm/io.h
@@ -435,6 +435,7 @@ static inline void __iomem * ioremap_cache (unsigned long phys_addr, unsigned lo
 {
 	return ioremap(phys_addr, size);
 }
+#define ioremap_cache ioremap_cache
 
 
 /*
diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h
index 728c4c571f40ee..6194e20fcccab5 100644
--- a/arch/sh/include/asm/io.h
+++ b/arch/sh/include/asm/io.h
@@ -342,6 +342,7 @@ ioremap_cache(phys_addr_t offset, unsigned long size)
 {
 	return __ioremap_mode(offset, size, PAGE_KERNEL);
 }
+#define ioremap_cache ioremap_cache
 
 #ifdef CONFIG_HAVE_IOREMAP_PROT
 static inline void __iomem *
diff --git a/arch/xtensa/include/asm/io.h b/arch/xtensa/include/asm/io.h
index c39bb6e61911e3..867840f5400f4d 100644
--- a/arch/xtensa/include/asm/io.h
+++ b/arch/xtensa/include/asm/io.h
@@ -57,6 +57,7 @@ static inline void __iomem *ioremap_cache(unsigned long offset,
 	else
 		BUG();
 }
+#define ioremap_cache ioremap_cache
 
 #define ioremap_wc ioremap_nocache
 #define ioremap_wt ioremap_nocache
diff --git a/include/linux/io.h b/include/linux/io.h
index fb5a99800e77fa..3fcf6256c08866 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -121,4 +121,13 @@ static inline int arch_phys_wc_index(int handle)
 #endif
 #endif
 
+enum {
+	/* See memremap() kernel-doc for usage description... */
+	MEMREMAP_WB = 1 << 0,
+	MEMREMAP_WT = 1 << 1,
+};
+
+void *memremap(resource_size_t offset, size_t size, unsigned long flags);
+void memunmap(void *addr);
+
 #endif /* _LINUX_IO_H */
diff --git a/kernel/Makefile b/kernel/Makefile
index 43c4c920f30a92..92866d36e3760e 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -99,6 +99,8 @@ obj-$(CONFIG_JUMP_LABEL) += jump_label.o
 obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o
 obj-$(CONFIG_TORTURE_TEST) += torture.o
 
+obj-$(CONFIG_HAS_IOMEM) += memremap.o
+
 $(obj)/configs.o: $(obj)/config_data.h
 
 # config_data.h contains the same information as ikconfig.h but gzipped.
diff --git a/kernel/memremap.c b/kernel/memremap.c
new file mode 100644
index 00000000000000..a293de52e83796
--- /dev/null
+++ b/kernel/memremap.c
@@ -0,0 +1,98 @@
+/*
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+
+#ifndef ioremap_cache
+/* temporary while we convert existing ioremap_cache users to memremap */
+__weak void __iomem *ioremap_cache(resource_size_t offset, unsigned long size)
+{
+	return ioremap(offset, size);
+}
+#endif
+
+/**
+ * memremap() - remap an iomem_resource as cacheable memory
+ * @offset: iomem resource start address
+ * @size: size of remap
+ * @flags: either MEMREMAP_WB or MEMREMAP_WT
+ *
+ * memremap() is "ioremap" for cases where it is known that the resource
+ * being mapped does not have i/o side effects and the __iomem
+ * annotation is not applicable.
+ *
+ * MEMREMAP_WB - matches the default mapping for "System RAM" on
+ * the architecture.  This is usually a read-allocate write-back cache.
+ * Morever, if MEMREMAP_WB is specified and the requested remap region is RAM
+ * memremap() will bypass establishing a new mapping and instead return
+ * a pointer into the direct map.
+ *
+ * MEMREMAP_WT - establish a mapping whereby writes either bypass the
+ * cache or are written through to memory and never exist in a
+ * cache-dirty state with respect to program visibility.  Attempts to
+ * map "System RAM" with this mapping type will fail.
+ */
+void *memremap(resource_size_t offset, size_t size, unsigned long flags)
+{
+	int is_ram = region_intersects(offset, size, "System RAM");
+	void *addr = NULL;
+
+	if (is_ram == REGION_MIXED) {
+		WARN_ONCE(1, "memremap attempted on mixed range %pa size: %#lx\n",
+				&offset, (unsigned long) size);
+		return NULL;
+	}
+
+	/* Try all mapping types requested until one returns non-NULL */
+	if (flags & MEMREMAP_WB) {
+		flags &= ~MEMREMAP_WB;
+		/*
+		 * MEMREMAP_WB is special in that it can be satisifed
+		 * from the direct map.  Some archs depend on the
+		 * capability of memremap() to autodetect cases where
+		 * the requested range is potentially in "System RAM"
+		 */
+		if (is_ram == REGION_INTERSECTS)
+			addr = __va(offset);
+		else
+			addr = ioremap_cache(offset, size);
+	}
+
+	/*
+	 * If we don't have a mapping yet and more request flags are
+	 * pending then we will be attempting to establish a new virtual
+	 * address mapping.  Enforce that this mapping is not aliasing
+	 * "System RAM"
+	 */
+	if (!addr && is_ram == REGION_INTERSECTS && flags) {
+		WARN_ONCE(1, "memremap attempted on ram %pa size: %#lx\n",
+				&offset, (unsigned long) size);
+		return NULL;
+	}
+
+	if (!addr && (flags & MEMREMAP_WT)) {
+		flags &= ~MEMREMAP_WT;
+		addr = ioremap_wt(offset, size);
+	}
+
+	return addr;
+}
+EXPORT_SYMBOL(memremap);
+
+void memunmap(void *addr)
+{
+	if (is_vmalloc_addr(addr))
+		iounmap((void __iomem *) addr);
+}
+EXPORT_SYMBOL(memunmap);

From 3103dc0304fd9c8ab576977cd98140d4fbac1730 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 10 Aug 2015 23:07:06 -0400
Subject: [PATCH 257/734] visorbus: switch from ioremap_cache to memremap

In preparation for deprecating ioremap_cache() convert its usage in
visorbus to memremap.

Cc: Benjamin Romer <benjamin.romer@unisys.com>
Cc: David Kershner <david.kershner@unisys.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/staging/unisys/visorbus/visorchannel.c | 16 +++++++++-------
 drivers/staging/unisys/visorbus/visorchipset.c | 17 +++++++++--------
 2 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/drivers/staging/unisys/visorbus/visorchannel.c b/drivers/staging/unisys/visorbus/visorchannel.c
index 20b63496e9f215..19c4a78a3617ec 100644
--- a/drivers/staging/unisys/visorbus/visorchannel.c
+++ b/drivers/staging/unisys/visorbus/visorchannel.c
@@ -21,6 +21,7 @@
  */
 
 #include <linux/uuid.h>
+#include <linux/io.h>
 
 #include "version.h"
 #include "visorbus.h"
@@ -36,7 +37,7 @@ static const uuid_le spar_video_guid = SPAR_CONSOLEVIDEO_CHANNEL_PROTOCOL_GUID;
 struct visorchannel {
 	u64 physaddr;
 	ulong nbytes;
-	void __iomem *mapped;
+	void *mapped;
 	bool requested;
 	struct channel_header chan_hdr;
 	uuid_le guid;
@@ -93,7 +94,7 @@ visorchannel_create_guts(u64 physaddr, unsigned long channel_bytes,
 		}
 	}
 
-	channel->mapped = ioremap_cache(physaddr, size);
+	channel->mapped = memremap(physaddr, size, MEMREMAP_WB);
 	if (!channel->mapped) {
 		release_mem_region(physaddr, size);
 		goto cleanup;
@@ -113,7 +114,7 @@ visorchannel_create_guts(u64 physaddr, unsigned long channel_bytes,
 	if (uuid_le_cmp(guid, NULL_UUID_LE) == 0)
 		guid = channel->chan_hdr.chtype;
 
-	iounmap(channel->mapped);
+	memunmap(channel->mapped);
 	if (channel->requested)
 		release_mem_region(channel->physaddr, channel->nbytes);
 	channel->mapped = NULL;
@@ -126,7 +127,8 @@ visorchannel_create_guts(u64 physaddr, unsigned long channel_bytes,
 		}
 	}
 
-	channel->mapped = ioremap_cache(channel->physaddr, channel_bytes);
+	channel->mapped = memremap(channel->physaddr, channel_bytes,
+			MEMREMAP_WB);
 	if (!channel->mapped) {
 		release_mem_region(channel->physaddr, channel_bytes);
 		goto cleanup;
@@ -167,7 +169,7 @@ visorchannel_destroy(struct visorchannel *channel)
 	if (!channel)
 		return;
 	if (channel->mapped) {
-		iounmap(channel->mapped);
+		memunmap(channel->mapped);
 		if (channel->requested)
 			release_mem_region(channel->physaddr, channel->nbytes);
 	}
@@ -241,7 +243,7 @@ visorchannel_read(struct visorchannel *channel, ulong offset,
 	if (offset + nbytes > channel->nbytes)
 		return -EIO;
 
-	memcpy_fromio(local, channel->mapped + offset, nbytes);
+	memcpy(local, channel->mapped + offset, nbytes);
 
 	return 0;
 }
@@ -262,7 +264,7 @@ visorchannel_write(struct visorchannel *channel, ulong offset,
 		memcpy(&channel->chan_hdr + offset, local, copy_size);
 	}
 
-	memcpy_toio(channel->mapped + offset, local, nbytes);
+	memcpy(channel->mapped + offset, local, nbytes);
 
 	return 0;
 }
diff --git a/drivers/staging/unisys/visorbus/visorchipset.c b/drivers/staging/unisys/visorbus/visorchipset.c
index bb8087e70127c8..e3c55ccf929b44 100644
--- a/drivers/staging/unisys/visorbus/visorchipset.c
+++ b/drivers/staging/unisys/visorbus/visorchipset.c
@@ -119,7 +119,7 @@ static struct visorchannel *controlvm_channel;
 
 /* Manages the request payload in the controlvm channel */
 struct visor_controlvm_payload_info {
-	u8 __iomem *ptr;	/* pointer to base address of payload pool */
+	u8 *ptr;		/* pointer to base address of payload pool */
 	u64 offset;		/* offset from beginning of controlvm
 				 * channel to beginning of payload * pool */
 	u32 bytes;		/* number of bytes in payload pool */
@@ -401,21 +401,22 @@ parser_init_byte_stream(u64 addr, u32 bytes, bool local, bool *retry)
 		p = __va((unsigned long) (addr));
 		memcpy(ctx->data, p, bytes);
 	} else {
-		void __iomem *mapping;
+		void *mapping;
 
 		if (!request_mem_region(addr, bytes, "visorchipset")) {
 			rc = NULL;
 			goto cleanup;
 		}
 
-		mapping = ioremap_cache(addr, bytes);
+		mapping = memremap(addr, bytes, MEMREMAP_WB);
 		if (!mapping) {
 			release_mem_region(addr, bytes);
 			rc = NULL;
 			goto cleanup;
 		}
-		memcpy_fromio(ctx->data, mapping, bytes);
+		memcpy(ctx->data, mapping, bytes);
 		release_mem_region(addr, bytes);
+		memunmap(mapping);
 	}
 
 	ctx->byte_stream = true;
@@ -1327,7 +1328,7 @@ static int
 initialize_controlvm_payload_info(u64 phys_addr, u64 offset, u32 bytes,
 				  struct visor_controlvm_payload_info *info)
 {
-	u8 __iomem *payload = NULL;
+	u8 *payload = NULL;
 	int rc = CONTROLVM_RESP_SUCCESS;
 
 	if (!info) {
@@ -1339,7 +1340,7 @@ initialize_controlvm_payload_info(u64 phys_addr, u64 offset, u32 bytes,
 		rc = -CONTROLVM_RESP_ERROR_PAYLOAD_INVALID;
 		goto cleanup;
 	}
-	payload = ioremap_cache(phys_addr + offset, bytes);
+	payload = memremap(phys_addr + offset, bytes, MEMREMAP_WB);
 	if (!payload) {
 		rc = -CONTROLVM_RESP_ERROR_IOREMAP_FAILED;
 		goto cleanup;
@@ -1352,7 +1353,7 @@ initialize_controlvm_payload_info(u64 phys_addr, u64 offset, u32 bytes,
 cleanup:
 	if (rc < 0) {
 		if (payload) {
-			iounmap(payload);
+			memunmap(payload);
 			payload = NULL;
 		}
 	}
@@ -1363,7 +1364,7 @@ static void
 destroy_controlvm_payload_info(struct visor_controlvm_payload_info *info)
 {
 	if (info->ptr) {
-		iounmap(info->ptr);
+		memunmap(info->ptr);
 		info->ptr = NULL;
 	}
 	memset(info, 0, sizeof(struct visor_controlvm_payload_info));

From e836a256e8fd579c9d7a3685f22981225a1ca451 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 12 Aug 2015 18:42:56 -0400
Subject: [PATCH 258/734] pmem: convert to generic memremap

Kill arch_memremap_pmem() and just let the architecture specify the
flags to be passed to memremap().  Default to writethrough by default.

Suggested-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/x86/include/asm/io.h         |  6 +-----
 include/linux/pmem.h              | 28 ++++++++-----------------
 tools/testing/nvdimm/Kbuild       |  4 ++--
 tools/testing/nvdimm/test/iomap.c | 34 +++++++++++++++++++++++--------
 4 files changed, 37 insertions(+), 35 deletions(-)

diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index cc9c61bc1abed1..d241fbd5c87b2f 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -248,11 +248,7 @@ static inline void flush_write_buffers(void)
 #endif
 }
 
-static inline void __pmem *arch_memremap_pmem(resource_size_t offset,
-	unsigned long size)
-{
-	return (void __force __pmem *) ioremap_cache(offset, size);
-}
+#define ARCH_MEMREMAP_PMEM MEMREMAP_WB
 
 #endif /* __KERNEL__ */
 
diff --git a/include/linux/pmem.h b/include/linux/pmem.h
index d2114045a6c438..093c35ecefcc7b 100644
--- a/include/linux/pmem.h
+++ b/include/linux/pmem.h
@@ -28,12 +28,6 @@ static inline bool __arch_has_wmb_pmem(void)
 	return false;
 }
 
-static inline void __pmem *arch_memremap_pmem(resource_size_t offset,
-		unsigned long size)
-{
-	return NULL;
-}
-
 static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
 		size_t n)
 {
@@ -43,8 +37,8 @@ static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
 
 /*
  * Architectures that define ARCH_HAS_PMEM_API must provide
- * implementations for arch_memremap_pmem(), arch_memcpy_to_pmem(),
- * arch_wmb_pmem(), and __arch_has_wmb_pmem().
+ * implementations for arch_memcpy_to_pmem(), arch_wmb_pmem(), and
+ * __arch_has_wmb_pmem().
  */
 
 static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t size)
@@ -54,7 +48,7 @@ static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t si
 
 static inline void memunmap_pmem(void __pmem *addr)
 {
-	iounmap((void __force __iomem *) addr);
+	memunmap((void __force *) addr);
 }
 
 /**
@@ -85,18 +79,12 @@ static inline bool arch_has_pmem_api(void)
  * default_memremap_pmem + default_memcpy_to_pmem is sufficient for
  * making data durable relative to i/o completion.
  */
-static void default_memcpy_to_pmem(void __pmem *dst, const void *src,
+static inline void default_memcpy_to_pmem(void __pmem *dst, const void *src,
 		size_t size)
 {
 	memcpy((void __force *) dst, src, size);
 }
 
-static void __pmem *default_memremap_pmem(resource_size_t offset,
-		unsigned long size)
-{
-	return (void __pmem __force *)ioremap_wt(offset, size);
-}
-
 /**
  * memremap_pmem - map physical persistent memory for pmem api
  * @offset: physical address of persistent memory
@@ -112,9 +100,11 @@ static void __pmem *default_memremap_pmem(resource_size_t offset,
 static inline void __pmem *memremap_pmem(resource_size_t offset,
 		unsigned long size)
 {
-	if (arch_has_pmem_api())
-		return arch_memremap_pmem(offset, size);
-	return default_memremap_pmem(offset, size);
+#ifdef ARCH_MEMREMAP_PMEM
+	return (void __pmem *) memremap(offset, size, ARCH_MEMREMAP_PMEM);
+#else
+	return (void __pmem *) memremap(offset, size, MEMREMAP_WT);
+#endif
 }
 
 /**
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild
index f56914c7929b80..8032a49f787382 100644
--- a/tools/testing/nvdimm/Kbuild
+++ b/tools/testing/nvdimm/Kbuild
@@ -1,7 +1,7 @@
-ldflags-y += --wrap=ioremap_wt
 ldflags-y += --wrap=ioremap_wc
 ldflags-y += --wrap=devm_ioremap_nocache
-ldflags-y += --wrap=ioremap_cache
+ldflags-y += --wrap=memremap
+ldflags-y += --wrap=memunmap
 ldflags-y += --wrap=ioremap_nocache
 ldflags-y += --wrap=iounmap
 ldflags-y += --wrap=__request_region
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index 64bfaa50831ccd..21288f34a5ca6e 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -80,11 +80,20 @@ void __iomem *__wrap_devm_ioremap_nocache(struct device *dev,
 }
 EXPORT_SYMBOL(__wrap_devm_ioremap_nocache);
 
-void __iomem *__wrap_ioremap_cache(resource_size_t offset, unsigned long size)
+void *__wrap_memremap(resource_size_t offset, size_t size,
+		unsigned long flags)
 {
-	return __nfit_test_ioremap(offset, size, ioremap_cache);
+	struct nfit_test_resource *nfit_res;
+
+	rcu_read_lock();
+	nfit_res = get_nfit_res(offset);
+	rcu_read_unlock();
+	if (nfit_res)
+		return (void __iomem *) nfit_res->buf + offset
+			- nfit_res->res->start;
+	return memremap(offset, size, flags);
 }
-EXPORT_SYMBOL(__wrap_ioremap_cache);
+EXPORT_SYMBOL(__wrap_memremap);
 
 void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size)
 {
@@ -92,12 +101,6 @@ void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size)
 }
 EXPORT_SYMBOL(__wrap_ioremap_nocache);
 
-void __iomem *__wrap_ioremap_wt(resource_size_t offset, unsigned long size)
-{
-	return __nfit_test_ioremap(offset, size, ioremap_wt);
-}
-EXPORT_SYMBOL(__wrap_ioremap_wt);
-
 void __iomem *__wrap_ioremap_wc(resource_size_t offset, unsigned long size)
 {
 	return __nfit_test_ioremap(offset, size, ioremap_wc);
@@ -117,6 +120,19 @@ void __wrap_iounmap(volatile void __iomem *addr)
 }
 EXPORT_SYMBOL(__wrap_iounmap);
 
+void __wrap_memunmap(void *addr)
+{
+	struct nfit_test_resource *nfit_res;
+
+	rcu_read_lock();
+	nfit_res = get_nfit_res((unsigned long) addr);
+	rcu_read_unlock();
+	if (nfit_res)
+		return;
+	return memunmap(addr);
+}
+EXPORT_SYMBOL(__wrap_memunmap);
+
 struct resource *__wrap___request_region(struct resource *parent,
 		resource_size_t start, resource_size_t n, const char *name,
 		int flags)

From fbde1414acc0440024083bf0c391b259bcfc4826 Mon Sep 17 00:00:00 2001
From: Vishal Verma <vishal.l.verma@intel.com>
Date: Wed, 29 Jul 2015 14:58:07 -0600
Subject: [PATCH 259/734] libnvdimm, btt: clean up internal interfaces

Consolidate the parameters passed to arena_is_valid into just nd_btt,
and an info block to increase re-usability.

Similarly, btt_arena_write_layout doesn't need to be passed a uuid, as
it can be obtained from arena->nd_btt.

Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/btt.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 552f1c4f4dc6cc..87e6a96a6c19b9 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -585,12 +585,11 @@ static void free_arenas(struct btt *btt)
 /*
  * This function checks if the metadata layout is valid and error free
  */
-static int arena_is_valid(struct arena_info *arena, struct btt_sb *super,
-				u8 *uuid, u32 lbasize)
+static int arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super)
 {
 	u64 checksum;
 
-	if (memcmp(super->uuid, uuid, 16))
+	if (memcmp(super->uuid, nd_btt->uuid, 16))
 		return 0;
 
 	checksum = le64_to_cpu(super->checksum);
@@ -599,12 +598,12 @@ static int arena_is_valid(struct arena_info *arena, struct btt_sb *super,
 		return 0;
 	super->checksum = cpu_to_le64(checksum);
 
-	if (lbasize != le32_to_cpu(super->external_lbasize))
+	if (nd_btt->lbasize != le32_to_cpu(super->external_lbasize))
 		return 0;
 
 	/* TODO: figure out action for this */
 	if ((le32_to_cpu(super->flags) & IB_FLAG_ERROR_MASK) != 0)
-		dev_info(to_dev(arena), "Found arena with an error flag\n");
+		dev_info(&nd_btt->dev, "Found arena with an error flag\n");
 
 	return 1;
 }
@@ -666,8 +665,7 @@ static int discover_arenas(struct btt *btt)
 		if (ret)
 			goto out;
 
-		if (!arena_is_valid(arena, super, btt->nd_btt->uuid,
-				btt->lbasize)) {
+		if (!arena_is_valid(btt->nd_btt, super)) {
 			if (remaining == btt->rawsize) {
 				btt->init_state = INIT_NOTFOUND;
 				dev_info(to_dev(arena), "No existing arenas\n");
@@ -756,10 +754,11 @@ static int create_arenas(struct btt *btt)
  * It is only called for an uninitialized arena when a write
  * to that arena occurs for the first time.
  */
-static int btt_arena_write_layout(struct arena_info *arena, u8 *uuid)
+static int btt_arena_write_layout(struct arena_info *arena)
 {
 	int ret;
 	struct btt_sb *super;
+	struct nd_btt *nd_btt = arena->nd_btt;
 
 	ret = btt_map_init(arena);
 	if (ret)
@@ -774,7 +773,7 @@ static int btt_arena_write_layout(struct arena_info *arena, u8 *uuid)
 		return -ENOMEM;
 
 	strncpy(super->signature, BTT_SIG, BTT_SIG_LEN);
-	memcpy(super->uuid, uuid, 16);
+	memcpy(super->uuid, nd_btt->uuid, 16);
 	super->flags = cpu_to_le32(arena->flags);
 	super->version_major = cpu_to_le16(arena->version_major);
 	super->version_minor = cpu_to_le16(arena->version_minor);
@@ -814,7 +813,7 @@ static int btt_meta_init(struct btt *btt)
 
 	mutex_lock(&btt->init_lock);
 	list_for_each_entry(arena, &btt->arena_list, list) {
-		ret = btt_arena_write_layout(arena, btt->nd_btt->uuid);
+		ret = btt_arena_write_layout(arena);
 		if (ret)
 			goto unlock;
 

From ab45e7632717b811e0786e46ca5ad279cb731b66 Mon Sep 17 00:00:00 2001
From: Vishal Verma <vishal.l.verma@intel.com>
Date: Wed, 29 Jul 2015 14:58:08 -0600
Subject: [PATCH 260/734] libnvdimm, btt: consolidate arena validation

Use arena_is_valid as a common routine for checking the validity of an
info block from both discover_arenas, and nd_btt_probe.

As a result, don't check for validity of the BTT's UUID, and lbasize.
The checksum in the BTT info block guarantees self-consistency, and when
we're called from nd_btt_probe, we don't have a valid uuid or lbasize
available to check against.

Also cleanup to return a bool instead of an int.

Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/btt.c      | 28 +-------------------------
 drivers/nvdimm/btt.h      |  3 +++
 drivers/nvdimm/btt_devs.c | 42 ++++++++++++++++++++++++++++++---------
 3 files changed, 37 insertions(+), 36 deletions(-)

diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 87e6a96a6c19b9..6567746aa315ed 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -582,32 +582,6 @@ static void free_arenas(struct btt *btt)
 	}
 }
 
-/*
- * This function checks if the metadata layout is valid and error free
- */
-static int arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super)
-{
-	u64 checksum;
-
-	if (memcmp(super->uuid, nd_btt->uuid, 16))
-		return 0;
-
-	checksum = le64_to_cpu(super->checksum);
-	super->checksum = 0;
-	if (checksum != nd_btt_sb_checksum(super))
-		return 0;
-	super->checksum = cpu_to_le64(checksum);
-
-	if (nd_btt->lbasize != le32_to_cpu(super->external_lbasize))
-		return 0;
-
-	/* TODO: figure out action for this */
-	if ((le32_to_cpu(super->flags) & IB_FLAG_ERROR_MASK) != 0)
-		dev_info(&nd_btt->dev, "Found arena with an error flag\n");
-
-	return 1;
-}
-
 /*
  * This function reads an existing valid btt superblock and
  * populates the corresponding arena_info struct
@@ -665,7 +639,7 @@ static int discover_arenas(struct btt *btt)
 		if (ret)
 			goto out;
 
-		if (!arena_is_valid(btt->nd_btt, super)) {
+		if (!nd_btt_arena_is_valid(btt->nd_btt, super)) {
 			if (remaining == btt->rawsize) {
 				btt->init_state = INIT_NOTFOUND;
 				dev_info(to_dev(arena), "No existing arenas\n");
diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h
index 75b0d80a6bd9de..b2f8651e5395f0 100644
--- a/drivers/nvdimm/btt.h
+++ b/drivers/nvdimm/btt.h
@@ -182,4 +182,7 @@ struct btt {
 	int init_state;
 	int num_arenas;
 };
+
+bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super);
+
 #endif
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index 6ac8c0fea3ec2f..18e0663e922c1c 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -342,6 +342,38 @@ struct device *nd_btt_create(struct nd_region *nd_region)
 	return dev;
 }
 
+/**
+ * nd_btt_arena_is_valid - check if the metadata layout is valid
+ * @nd_btt:	device with BTT geometry and backing device info
+ * @super:	pointer to the arena's info block being tested
+ *
+ * Check consistency of the btt info block with itself by validating
+ * the checksum.
+ *
+ * Returns:
+ * false for an invalid info block, true for a valid one
+ */
+bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super)
+{
+	u64 checksum;
+
+	if (memcmp(super->signature, BTT_SIG, BTT_SIG_LEN) != 0)
+		return false;
+
+	checksum = le64_to_cpu(super->checksum);
+	super->checksum = 0;
+	if (checksum != nd_btt_sb_checksum(super))
+		return false;
+	super->checksum = cpu_to_le64(checksum);
+
+	/* TODO: figure out action for this */
+	if ((le32_to_cpu(super->flags) & IB_FLAG_ERROR_MASK) != 0)
+		dev_info(&nd_btt->dev, "Found arena with an error flag\n");
+
+	return true;
+}
+EXPORT_SYMBOL(nd_btt_arena_is_valid);
+
 /*
  * nd_btt_sb_checksum: compute checksum for btt info block
  *
@@ -364,8 +396,6 @@ EXPORT_SYMBOL(nd_btt_sb_checksum);
 static int __nd_btt_probe(struct nd_btt *nd_btt,
 		struct nd_namespace_common *ndns, struct btt_sb *btt_sb)
 {
-	u64 checksum;
-
 	if (!btt_sb || !ndns || !nd_btt)
 		return -ENODEV;
 
@@ -375,14 +405,8 @@ static int __nd_btt_probe(struct nd_btt *nd_btt,
 	if (nvdimm_namespace_capacity(ndns) < SZ_16M)
 		return -ENXIO;
 
-	if (memcmp(btt_sb->signature, BTT_SIG, BTT_SIG_LEN) != 0)
-		return -ENODEV;
-
-	checksum = le64_to_cpu(btt_sb->checksum);
-	btt_sb->checksum = 0;
-	if (checksum != nd_btt_sb_checksum(btt_sb))
+	if (!nd_btt_arena_is_valid(nd_btt, btt_sb))
 		return -ENODEV;
-	btt_sb->checksum = cpu_to_le64(checksum);
 
 	nd_btt->lbasize = le32_to_cpu(btt_sb->external_lbasize);
 	nd_btt->uuid = kmemdup(btt_sb->uuid, 16, GFP_KERNEL);

From 6ec689542b5bc516187917d49b112847dfb75b0b Mon Sep 17 00:00:00 2001
From: Vishal Verma <vishal.l.verma@intel.com>
Date: Wed, 29 Jul 2015 14:58:09 -0600
Subject: [PATCH 261/734] libnvdimm, btt: write and validate parent_uuid

When a BTT is instantiated on a namespace it must validate the namespace
uuid matches the 'parent_uuid' stored in the btt superblock. This
property enforces that changing the namespace UUID invalidates all
former BTT instances on that storage. For "IO namespaces" that don't
have a label or UUID, the parent_uuid is set to zero, and this
validation is skipped. For such cases, old BTTs have to be invalidated
by forcing the namespace to raw mode, and overwriting the BTT info
blocks.

Based on a patch by Dan Williams <dan.j.williams@intel.com>

Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/btt.c            |  2 ++
 drivers/nvdimm/btt_devs.c       | 15 ++++++++++++++-
 drivers/nvdimm/namespace_devs.c | 20 ++++++++++++++++++++
 drivers/nvdimm/nd.h             |  1 +
 4 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 6567746aa315ed..19588291550b54 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -733,6 +733,7 @@ static int btt_arena_write_layout(struct arena_info *arena)
 	int ret;
 	struct btt_sb *super;
 	struct nd_btt *nd_btt = arena->nd_btt;
+	const u8 *parent_uuid = nd_dev_to_uuid(&nd_btt->ndns->dev);
 
 	ret = btt_map_init(arena);
 	if (ret)
@@ -748,6 +749,7 @@ static int btt_arena_write_layout(struct arena_info *arena)
 
 	strncpy(super->signature, BTT_SIG, BTT_SIG_LEN);
 	memcpy(super->uuid, nd_btt->uuid, 16);
+	memcpy(super->parent_uuid, parent_uuid, 16);
 	super->flags = cpu_to_le32(arena->flags);
 	super->version_major = cpu_to_le16(arena->version_major);
 	super->version_minor = cpu_to_le16(arena->version_minor);
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index 18e0663e922c1c..242ae1c550ad6f 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -342,24 +342,37 @@ struct device *nd_btt_create(struct nd_region *nd_region)
 	return dev;
 }
 
+static bool uuid_is_null(u8 *uuid)
+{
+	static const u8 null_uuid[16];
+
+	return (memcmp(uuid, null_uuid, 16) == 0);
+}
+
 /**
  * nd_btt_arena_is_valid - check if the metadata layout is valid
  * @nd_btt:	device with BTT geometry and backing device info
  * @super:	pointer to the arena's info block being tested
  *
  * Check consistency of the btt info block with itself by validating
- * the checksum.
+ * the checksum, and with the parent namespace by verifying the
+ * parent_uuid contained in the info block with the one supplied in.
  *
  * Returns:
  * false for an invalid info block, true for a valid one
  */
 bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super)
 {
+	const u8 *parent_uuid = nd_dev_to_uuid(&nd_btt->ndns->dev);
 	u64 checksum;
 
 	if (memcmp(super->signature, BTT_SIG, BTT_SIG_LEN) != 0)
 		return false;
 
+	if (!uuid_is_null(super->parent_uuid))
+		if (memcmp(super->parent_uuid, parent_uuid, 16) != 0)
+			return false;
+
 	checksum = le64_to_cpu(super->checksum);
 	super->checksum = 0;
 	if (checksum != nd_btt_sb_checksum(super))
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index fef0dd80d4adb1..b18ffea9d85be3 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -100,6 +100,26 @@ const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
 }
 EXPORT_SYMBOL(nvdimm_namespace_disk_name);
 
+const u8 *nd_dev_to_uuid(struct device *dev)
+{
+	static const u8 null_uuid[16];
+
+	if (!dev)
+		return null_uuid;
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		return nspm->uuid;
+	} else if (is_namespace_blk(dev)) {
+		struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+		return nsblk->uuid;
+	} else
+		return null_uuid;
+}
+EXPORT_SYMBOL(nd_dev_to_uuid);
+
 static ssize_t nstype_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 835263e47bb87d..f9615824947bd7 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -217,4 +217,5 @@ static inline bool nd_iostat_start(struct bio *bio, unsigned long *start)
 }
 void nd_iostat_end(struct bio *bio, unsigned long start);
 resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk);
+const u8 *nd_dev_to_uuid(struct device *dev);
 #endif /* __ND_H__ */

From 07ea400e1b26726f21b2c2299d187d6eb7eb4324 Mon Sep 17 00:00:00 2001
From: Markus Pargmann <mpa@pengutronix.de>
Date: Wed, 12 Aug 2015 12:12:33 +0200
Subject: [PATCH 262/734] regmap: Fix regmap_can_raw_write check

This function is missing a check if map->bus->write is implemented. If
it is not implemented arbitrary raw writes are not possible.

Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 7111d04f26218b..8e7208d92de127 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -1382,7 +1382,8 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg,
  */
 bool regmap_can_raw_write(struct regmap *map)
 {
-	return map->bus && map->format.format_val && map->format.format_reg;
+	return map->bus && map->bus->write && map->format.format_val &&
+		map->format.format_reg;
 }
 EXPORT_SYMBOL_GPL(regmap_can_raw_write);
 

From ad3a942bd22fd40a0f4ddaf2759946ce945662af Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Mon, 1 Jun 2015 18:47:55 -0700
Subject: [PATCH 263/734] regulator: core: Print at debug level on debugfs
 creation failure

Failure to create a debugfs node is not an error, but we print a
warning upon failure to create the node. Downgrade this to a
debug printk so that we're quiet here. This allows multiple
drivers to request a CPU's regulator so that CPUfreq and AVSish
drivers can coexist.

The downside of this approach is that whoever gets to debugfs first
the others who come later to not have any debugfs attributes associated
with them.

Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index c9f72019bd689a..c203f3ebe95166 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1240,7 +1240,7 @@ static struct regulator *create_regulator(struct regulator_dev *rdev,
 	regulator->debugfs = debugfs_create_dir(regulator->supply_name,
 						rdev->debugfs);
 	if (!regulator->debugfs) {
-		rdev_warn(rdev, "Failed to create debugfs directory\n");
+		rdev_dbg(rdev, "Failed to create debugfs directory\n");
 	} else {
 		debugfs_create_u32("uA_load", 0444, regulator->debugfs,
 				   &regulator->uA_load);

From 7d3dcf26a6559fa82af3f53e2c8b163cec95fdaf Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 10 Aug 2015 23:07:07 -0400
Subject: [PATCH 264/734] devres: add devm_memremap

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/io.h |  4 ++++
 kernel/memremap.c  | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+)

diff --git a/include/linux/io.h b/include/linux/io.h
index 3fcf6256c08866..d8d749abd6658a 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -80,6 +80,10 @@ int check_signature(const volatile void __iomem *io_addr,
 			const unsigned char *signature, int length);
 void devm_ioremap_release(struct device *dev, void *res);
 
+void *devm_memremap(struct device *dev, resource_size_t offset,
+		size_t size, unsigned long flags);
+void devm_memunmap(struct device *dev, void *addr);
+
 /*
  * Some systems do not have legacy ISA devices.
  * /dev/port is not a valid interface on these systems.
diff --git a/kernel/memremap.c b/kernel/memremap.c
index a293de52e83796..5c9b55eaf121a7 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -10,6 +10,7 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  */
+#include <linux/device.h>
 #include <linux/types.h>
 #include <linux/io.h>
 #include <linux/mm.h>
@@ -96,3 +97,41 @@ void memunmap(void *addr)
 		iounmap((void __iomem *) addr);
 }
 EXPORT_SYMBOL(memunmap);
+
+static void devm_memremap_release(struct device *dev, void *res)
+{
+	memunmap(res);
+}
+
+static int devm_memremap_match(struct device *dev, void *res, void *match_data)
+{
+	return *(void **)res == match_data;
+}
+
+void *devm_memremap(struct device *dev, resource_size_t offset,
+		size_t size, unsigned long flags)
+{
+	void **ptr, *addr;
+
+	ptr = devres_alloc(devm_memremap_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return NULL;
+
+	addr = memremap(offset, size, flags);
+	if (addr) {
+		*ptr = addr;
+		devres_add(dev, ptr);
+	} else
+		devres_free(ptr);
+
+	return addr;
+}
+EXPORT_SYMBOL(devm_memremap);
+
+void devm_memunmap(struct device *dev, void *addr)
+{
+	WARN_ON(devres_destroy(dev, devm_memremap_release, devm_memremap_match,
+			       addr));
+	memunmap(addr);
+}
+EXPORT_SYMBOL(devm_memunmap);

From 708ab62bef1ed3a3cf065a4138bd87f5d083cfeb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 10 Aug 2015 23:07:08 -0400
Subject: [PATCH 265/734] pmem: switch to devm_ allocations

Signed-off-by: Christoph Hellwig <hch@lst.de>
[djbw: tools/testing/nvdimm/ and memunmap_pmem support]
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/pmem.c             | 36 ++++++++----------------
 include/linux/pmem.h              | 14 ++++++----
 tools/testing/nvdimm/Kbuild       |  4 +--
 tools/testing/nvdimm/test/iomap.c | 46 +++++++++++++++++--------------
 4 files changed, 47 insertions(+), 53 deletions(-)

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index bcf48f1334431b..eb7552d939e1bf 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -119,7 +119,7 @@ static struct pmem_device *pmem_alloc(struct device *dev,
 {
 	struct pmem_device *pmem;
 
-	pmem = kzalloc(sizeof(*pmem), GFP_KERNEL);
+	pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL);
 	if (!pmem)
 		return ERR_PTR(-ENOMEM);
 
@@ -128,19 +128,16 @@ static struct pmem_device *pmem_alloc(struct device *dev,
 	if (!arch_has_pmem_api())
 		dev_warn(dev, "unable to guarantee persistence of writes\n");
 
-	if (!request_mem_region(pmem->phys_addr, pmem->size, dev_name(dev))) {
+	if (!devm_request_mem_region(dev, pmem->phys_addr, pmem->size,
+			dev_name(dev))) {
 		dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n",
 				&pmem->phys_addr, pmem->size);
-		kfree(pmem);
 		return ERR_PTR(-EBUSY);
 	}
 
-	pmem->virt_addr = memremap_pmem(pmem->phys_addr, pmem->size);
-	if (!pmem->virt_addr) {
-		release_mem_region(pmem->phys_addr, pmem->size);
-		kfree(pmem);
+	pmem->virt_addr = memremap_pmem(dev, pmem->phys_addr, pmem->size);
+	if (!pmem->virt_addr)
 		return ERR_PTR(-ENXIO);
-	}
 
 	return pmem;
 }
@@ -210,20 +207,12 @@ static int pmem_rw_bytes(struct nd_namespace_common *ndns,
 	return 0;
 }
 
-static void pmem_free(struct pmem_device *pmem)
-{
-	memunmap_pmem(pmem->virt_addr);
-	release_mem_region(pmem->phys_addr, pmem->size);
-	kfree(pmem);
-}
-
 static int nd_pmem_probe(struct device *dev)
 {
 	struct nd_region *nd_region = to_nd_region(dev->parent);
 	struct nd_namespace_common *ndns;
 	struct nd_namespace_io *nsio;
 	struct pmem_device *pmem;
-	int rc;
 
 	ndns = nvdimm_namespace_common_probe(dev);
 	if (IS_ERR(ndns))
@@ -236,16 +225,14 @@ static int nd_pmem_probe(struct device *dev)
 
 	dev_set_drvdata(dev, pmem);
 	ndns->rw_bytes = pmem_rw_bytes;
+
 	if (is_nd_btt(dev))
-		rc = nvdimm_namespace_attach_btt(ndns);
-	else if (nd_btt_probe(ndns, pmem) == 0) {
+		return nvdimm_namespace_attach_btt(ndns);
+
+	if (nd_btt_probe(ndns, pmem) == 0)
 		/* we'll come back as btt-pmem */
-		rc = -ENXIO;
-	} else
-		rc = pmem_attach_disk(ndns, pmem);
-	if (rc)
-		pmem_free(pmem);
-	return rc;
+		return -ENXIO;
+	return pmem_attach_disk(ndns, pmem);
 }
 
 static int nd_pmem_remove(struct device *dev)
@@ -256,7 +243,6 @@ static int nd_pmem_remove(struct device *dev)
 		nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns);
 	else
 		pmem_detach_disk(pmem);
-	pmem_free(pmem);
 
 	return 0;
 }
diff --git a/include/linux/pmem.h b/include/linux/pmem.h
index 093c35ecefcc7b..20c367cd76e6ae 100644
--- a/include/linux/pmem.h
+++ b/include/linux/pmem.h
@@ -46,9 +46,9 @@ static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t si
 	memcpy(dst, (void __force const *) src, size);
 }
 
-static inline void memunmap_pmem(void __pmem *addr)
+static inline void memunmap_pmem(struct device *dev, void __pmem *addr)
 {
-	memunmap((void __force *) addr);
+	devm_memunmap(dev, (void __force *) addr);
 }
 
 /**
@@ -97,13 +97,15 @@ static inline void default_memcpy_to_pmem(void __pmem *dst, const void *src,
  * wmb_pmem() arrange for the data to be written through the
  * cache to persistent media.
  */
-static inline void __pmem *memremap_pmem(resource_size_t offset,
-		unsigned long size)
+static inline void __pmem *memremap_pmem(struct device *dev,
+		resource_size_t offset, unsigned long size)
 {
 #ifdef ARCH_MEMREMAP_PMEM
-	return (void __pmem *) memremap(offset, size, ARCH_MEMREMAP_PMEM);
+	return (void __pmem *) devm_memremap(dev, offset, size,
+			ARCH_MEMREMAP_PMEM);
 #else
-	return (void __pmem *) memremap(offset, size, MEMREMAP_WT);
+	return (void __pmem *) devm_memremap(dev, offset, size,
+			MEMREMAP_WT);
 #endif
 }
 
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild
index 8032a49f787382..04c5fc09576de9 100644
--- a/tools/testing/nvdimm/Kbuild
+++ b/tools/testing/nvdimm/Kbuild
@@ -1,9 +1,9 @@
 ldflags-y += --wrap=ioremap_wc
 ldflags-y += --wrap=devm_ioremap_nocache
-ldflags-y += --wrap=memremap
-ldflags-y += --wrap=memunmap
+ldflags-y += --wrap=devm_memremap
 ldflags-y += --wrap=ioremap_nocache
 ldflags-y += --wrap=iounmap
+ldflags-y += --wrap=__devm_request_region
 ldflags-y += --wrap=__request_region
 ldflags-y += --wrap=__release_region
 
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index 21288f34a5ca6e..ff1e0045886409 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -80,8 +80,8 @@ void __iomem *__wrap_devm_ioremap_nocache(struct device *dev,
 }
 EXPORT_SYMBOL(__wrap_devm_ioremap_nocache);
 
-void *__wrap_memremap(resource_size_t offset, size_t size,
-		unsigned long flags)
+void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
+		size_t size, unsigned long flags)
 {
 	struct nfit_test_resource *nfit_res;
 
@@ -91,9 +91,9 @@ void *__wrap_memremap(resource_size_t offset, size_t size,
 	if (nfit_res)
 		return (void __iomem *) nfit_res->buf + offset
 			- nfit_res->res->start;
-	return memremap(offset, size, flags);
+	return devm_memremap(dev, offset, size, flags);
 }
-EXPORT_SYMBOL(__wrap_memremap);
+EXPORT_SYMBOL(__wrap_devm_memremap);
 
 void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size)
 {
@@ -120,22 +120,9 @@ void __wrap_iounmap(volatile void __iomem *addr)
 }
 EXPORT_SYMBOL(__wrap_iounmap);
 
-void __wrap_memunmap(void *addr)
-{
-	struct nfit_test_resource *nfit_res;
-
-	rcu_read_lock();
-	nfit_res = get_nfit_res((unsigned long) addr);
-	rcu_read_unlock();
-	if (nfit_res)
-		return;
-	return memunmap(addr);
-}
-EXPORT_SYMBOL(__wrap_memunmap);
-
-struct resource *__wrap___request_region(struct resource *parent,
-		resource_size_t start, resource_size_t n, const char *name,
-		int flags)
+static struct resource *nfit_test_request_region(struct device *dev,
+		struct resource *parent, resource_size_t start,
+		resource_size_t n, const char *name, int flags)
 {
 	struct nfit_test_resource *nfit_res;
 
@@ -163,10 +150,29 @@ struct resource *__wrap___request_region(struct resource *parent,
 			return res;
 		}
 	}
+	if (dev)
+		return __devm_request_region(dev, parent, start, n, name);
 	return __request_region(parent, start, n, name, flags);
 }
+
+struct resource *__wrap___request_region(struct resource *parent,
+		resource_size_t start, resource_size_t n, const char *name,
+		int flags)
+{
+	return nfit_test_request_region(NULL, parent, start, n, name, flags);
+}
 EXPORT_SYMBOL(__wrap___request_region);
 
+struct resource *__wrap___devm_request_region(struct device *dev,
+		struct resource *parent, resource_size_t start,
+		resource_size_t n, const char *name)
+{
+	if (!dev)
+		return NULL;
+	return nfit_test_request_region(dev, parent, start, n, name, 0);
+}
+EXPORT_SYMBOL(__wrap___devm_request_region);
+
 void __wrap___release_region(struct resource *parent, resource_size_t start,
 				resource_size_t n)
 {

From 642c28ab86f7666d2ac62a0dc391b4c3121f1d6e Mon Sep 17 00:00:00 2001
From: David Jander <david@protonic.nl>
Date: Tue, 23 Jun 2015 11:43:52 +0200
Subject: [PATCH 266/734] mmc: core: Optimize case for exactly one erase-group
 budget

In the (not so unlikely) case that the mmc controller timeout budget is
enough for exactly one erase-group, the simplification of allowing one
sector has an enormous performance penalty. We optimize this special case
by introducing a flag that prohibits erase-group boundary crossing, so
that we can allow trimming more than one sector at a time.

Signed-off-by: David Jander <david@protonic.nl>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/core.c  | 38 ++++++++++++++++++++++++++++++++++----
 include/linux/mmc/card.h |  1 +
 2 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 9ad73f30f744fd..083cade3ffc5d1 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -2168,6 +2168,7 @@ int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr,
 	      unsigned int arg)
 {
 	unsigned int rem, to = from + nr;
+	int err;
 
 	if (!(card->host->caps & MMC_CAP_ERASE) ||
 	    !(card->csd.cmdclass & CCC_ERASE))
@@ -2218,6 +2219,23 @@ int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr,
 	/* 'from' and 'to' are inclusive */
 	to -= 1;
 
+	/*
+	 * Special case where only one erase-group fits in the timeout budget:
+	 * If the region crosses an erase-group boundary on this particular
+	 * case, we will be trimming more than one erase-group which, does not
+	 * fit in the timeout budget of the controller, so we need to split it
+	 * and call mmc_do_erase() twice if necessary. This special case is
+	 * identified by the card->eg_boundary flag.
+	 */
+	if ((arg & MMC_TRIM_ARGS) && (card->eg_boundary) &&
+	    (from % card->erase_size)) {
+		rem = card->erase_size - (from % card->erase_size);
+		err = mmc_do_erase(card, from, from + rem - 1, arg);
+		from += rem;
+		if ((err) || (to <= from))
+			return err;
+	}
+
 	return mmc_do_erase(card, from, to, arg);
 }
 EXPORT_SYMBOL(mmc_erase);
@@ -2313,16 +2331,28 @@ static unsigned int mmc_do_calc_max_discard(struct mmc_card *card,
 	if (!qty)
 		return 0;
 
+	/*
+	 * When specifying a sector range to trim, chances are we might cross
+	 * an erase-group boundary even if the amount of sectors is less than
+	 * one erase-group.
+	 * If we can only fit one erase-group in the controller timeout budget,
+	 * we have to care that erase-group boundaries are not crossed by a
+	 * single trim operation. We flag that special case with "eg_boundary".
+	 * In all other cases we can just decrement qty and pretend that we
+	 * always touch (qty + 1) erase-groups as a simple optimization.
+	 */
 	if (qty == 1)
-		return 1;
+		card->eg_boundary = 1;
+	else
+		qty--;
 
 	/* Convert qty to sectors */
 	if (card->erase_shift)
-		max_discard = --qty << card->erase_shift;
+		max_discard = qty << card->erase_shift;
 	else if (mmc_card_sd(card))
-		max_discard = qty;
+		max_discard = qty + 1;
 	else
-		max_discard = --qty * card->erase_size;
+		max_discard = qty * card->erase_size;
 
 	return max_discard;
 }
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 4d3776d25925a1..8fcbcd13218f5f 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -283,6 +283,7 @@ struct mmc_card {
 	unsigned int		erase_size;	/* erase size in sectors */
  	unsigned int		erase_shift;	/* if erase unit is power 2 */
  	unsigned int		pref_erase;	/* in sectors */
+	unsigned int		eg_boundary;	/* don't cross erase-group boundaries */
  	u8			erased_byte;	/* value of erased bytes */
 
 	u32			raw_cid[4];	/* raw card CID */

From 6464b71409511939efce1ae4fb4ec6e3483b11b2 Mon Sep 17 00:00:00 2001
From: Daniel Mack <zonque@gmail.com>
Date: Sat, 6 Jun 2015 23:15:22 +0200
Subject: [PATCH 267/734] mmc: pxamci: switch over to dmaengine use

Switch over pxamci to dmaengine. This prepares the devicetree full
support of pxamci.

This was successfully tested on a PXA3xx board, as well as PXA27x.

Signed-off-by: Daniel Mack <zonque@gmail.com>
[adapted to pxa-dma]
Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/pxamci.c | 200 ++++++++++++++++++++++----------------
 1 file changed, 114 insertions(+), 86 deletions(-)

diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c
index 1b6d0bfe35f53c..1420f29628c70d 100644
--- a/drivers/mmc/host/pxamci.c
+++ b/drivers/mmc/host/pxamci.c
@@ -22,7 +22,9 @@
 #include <linux/platform_device.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
+#include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
+#include <linux/dma/pxa-dma.h>
 #include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/mmc/host.h>
@@ -37,7 +39,6 @@
 #include <asm/sizes.h>
 
 #include <mach/hardware.h>
-#include <mach/dma.h>
 #include <linux/platform_data/mmc-pxamci.h>
 
 #include "pxamci.h"
@@ -58,7 +59,6 @@ struct pxamci_host {
 	struct clk		*clk;
 	unsigned long		clkrate;
 	int			irq;
-	int			dma;
 	unsigned int		clkrt;
 	unsigned int		cmdat;
 	unsigned int		imask;
@@ -69,8 +69,10 @@ struct pxamci_host {
 	struct mmc_command	*cmd;
 	struct mmc_data		*data;
 
+	struct dma_chan		*dma_chan_rx;
+	struct dma_chan		*dma_chan_tx;
+	dma_cookie_t		dma_cookie;
 	dma_addr_t		sg_dma;
-	struct pxa_dma_desc	*sg_cpu;
 	unsigned int		dma_len;
 
 	unsigned int		dma_dir;
@@ -173,14 +175,18 @@ static void pxamci_disable_irq(struct pxamci_host *host, unsigned int mask)
 	spin_unlock_irqrestore(&host->lock, flags);
 }
 
+static void pxamci_dma_irq(void *param);
+
 static void pxamci_setup_data(struct pxamci_host *host, struct mmc_data *data)
 {
+	struct dma_async_tx_descriptor *tx;
+	enum dma_data_direction direction;
+	struct dma_slave_config	config;
+	struct dma_chan *chan;
 	unsigned int nob = data->blocks;
 	unsigned long long clks;
 	unsigned int timeout;
-	bool dalgn = 0;
-	u32 dcmd;
-	int i;
+	int ret;
 
 	host->data = data;
 
@@ -195,54 +201,48 @@ static void pxamci_setup_data(struct pxamci_host *host, struct mmc_data *data)
 	timeout = (unsigned int)clks + (data->timeout_clks << host->clkrt);
 	writel((timeout + 255) / 256, host->base + MMC_RDTO);
 
+	memset(&config, 0, sizeof(config));
+	config.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+	config.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+	config.src_addr = host->res->start + MMC_RXFIFO;
+	config.dst_addr = host->res->start + MMC_TXFIFO;
+	config.src_maxburst = 32;
+	config.dst_maxburst = 32;
+
 	if (data->flags & MMC_DATA_READ) {
 		host->dma_dir = DMA_FROM_DEVICE;
-		dcmd = DCMD_INCTRGADDR | DCMD_FLOWSRC;
-		DRCMR(host->dma_drcmrtx) = 0;
-		DRCMR(host->dma_drcmrrx) = host->dma | DRCMR_MAPVLD;
+		direction = DMA_DEV_TO_MEM;
+		chan = host->dma_chan_rx;
 	} else {
 		host->dma_dir = DMA_TO_DEVICE;
-		dcmd = DCMD_INCSRCADDR | DCMD_FLOWTRG;
-		DRCMR(host->dma_drcmrrx) = 0;
-		DRCMR(host->dma_drcmrtx) = host->dma | DRCMR_MAPVLD;
+		direction = DMA_MEM_TO_DEV;
+		chan = host->dma_chan_tx;
 	}
 
-	dcmd |= DCMD_BURST32 | DCMD_WIDTH1;
+	config.direction = direction;
+
+	ret = dmaengine_slave_config(chan, &config);
+	if (ret < 0) {
+		dev_err(mmc_dev(host->mmc), "dma slave config failed\n");
+		return;
+	}
 
-	host->dma_len = dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
+	host->dma_len = dma_map_sg(chan->device->dev, data->sg, data->sg_len,
 				   host->dma_dir);
 
-	for (i = 0; i < host->dma_len; i++) {
-		unsigned int length = sg_dma_len(&data->sg[i]);
-		host->sg_cpu[i].dcmd = dcmd | length;
-		if (length & 31 && !(data->flags & MMC_DATA_READ))
-			host->sg_cpu[i].dcmd |= DCMD_ENDIRQEN;
-		/* Not aligned to 8-byte boundary? */
-		if (sg_dma_address(&data->sg[i]) & 0x7)
-			dalgn = 1;
-		if (data->flags & MMC_DATA_READ) {
-			host->sg_cpu[i].dsadr = host->res->start + MMC_RXFIFO;
-			host->sg_cpu[i].dtadr = sg_dma_address(&data->sg[i]);
-		} else {
-			host->sg_cpu[i].dsadr = sg_dma_address(&data->sg[i]);
-			host->sg_cpu[i].dtadr = host->res->start + MMC_TXFIFO;
-		}
-		host->sg_cpu[i].ddadr = host->sg_dma + (i + 1) *
-					sizeof(struct pxa_dma_desc);
+	tx = dmaengine_prep_slave_sg(chan, data->sg, host->dma_len, direction,
+				     DMA_PREP_INTERRUPT);
+	if (!tx) {
+		dev_err(mmc_dev(host->mmc), "prep_slave_sg() failed\n");
+		return;
 	}
-	host->sg_cpu[host->dma_len - 1].ddadr = DDADR_STOP;
-	wmb();
 
-	/*
-	 * The PXA27x DMA controller encounters overhead when working with
-	 * unaligned (to 8-byte boundaries) data, so switch on byte alignment
-	 * mode only if we have unaligned data.
-	 */
-	if (dalgn)
-		DALGN |= (1 << host->dma);
-	else
-		DALGN &= ~(1 << host->dma);
-	DDADR(host->dma) = host->sg_dma;
+	if (!(data->flags & MMC_DATA_READ)) {
+		tx->callback = pxamci_dma_irq;
+		tx->callback_param = host;
+	}
+
+	host->dma_cookie = dmaengine_submit(tx);
 
 	/*
 	 * workaround for erratum #91:
@@ -251,7 +251,7 @@ static void pxamci_setup_data(struct pxamci_host *host, struct mmc_data *data)
 	 * before starting DMA.
 	 */
 	if (!cpu_is_pxa27x() || data->flags & MMC_DATA_READ)
-		DCSR(host->dma) = DCSR_RUN;
+		dma_async_issue_pending(chan);
 }
 
 static void pxamci_start_cmd(struct pxamci_host *host, struct mmc_command *cmd, unsigned int cmdat)
@@ -343,7 +343,7 @@ static int pxamci_cmd_done(struct pxamci_host *host, unsigned int stat)
 		 * enable DMA late
 		 */
 		if (cpu_is_pxa27x() && host->data->flags & MMC_DATA_WRITE)
-			DCSR(host->dma) = DCSR_RUN;
+			dma_async_issue_pending(host->dma_chan_tx);
 	} else {
 		pxamci_finish_request(host, host->mrq);
 	}
@@ -354,13 +354,17 @@ static int pxamci_cmd_done(struct pxamci_host *host, unsigned int stat)
 static int pxamci_data_done(struct pxamci_host *host, unsigned int stat)
 {
 	struct mmc_data *data = host->data;
+	struct dma_chan *chan;
 
 	if (!data)
 		return 0;
 
-	DCSR(host->dma) = 0;
-	dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
-		     host->dma_dir);
+	if (data->flags & MMC_DATA_READ)
+		chan = host->dma_chan_rx;
+	else
+		chan = host->dma_chan_tx;
+	dma_unmap_sg(chan->device->dev,
+		     data->sg, data->sg_len, host->dma_dir);
 
 	if (stat & STAT_READ_TIME_OUT)
 		data->error = -ETIMEDOUT;
@@ -552,20 +556,37 @@ static const struct mmc_host_ops pxamci_ops = {
 	.enable_sdio_irq	= pxamci_enable_sdio_irq,
 };
 
-static void pxamci_dma_irq(int dma, void *devid)
+static void pxamci_dma_irq(void *param)
 {
-	struct pxamci_host *host = devid;
-	int dcsr = DCSR(dma);
-	DCSR(dma) = dcsr & ~DCSR_STOPIRQEN;
+	struct pxamci_host *host = param;
+	struct dma_tx_state state;
+	enum dma_status status;
+	struct dma_chan *chan;
+	unsigned long flags;
+
+	spin_lock_irqsave(&host->lock, flags);
+
+	if (!host->data)
+		goto out_unlock;
 
-	if (dcsr & DCSR_ENDINTR) {
+	if (host->data->flags & MMC_DATA_READ)
+		chan = host->dma_chan_rx;
+	else
+		chan = host->dma_chan_tx;
+
+	status = dmaengine_tx_status(chan, host->dma_cookie, &state);
+
+	if (likely(status == DMA_COMPLETE)) {
 		writel(BUF_PART_FULL, host->base + MMC_PRTBUF);
 	} else {
-		pr_err("%s: DMA error on channel %d (DCSR=%#x)\n",
-		       mmc_hostname(host->mmc), dma, dcsr);
+		pr_err("%s: DMA error on %s channel\n", mmc_hostname(host->mmc),
+			host->data->flags & MMC_DATA_READ ? "rx" : "tx");
 		host->data->error = -EIO;
 		pxamci_data_done(host, 0);
 	}
+
+out_unlock:
+	spin_unlock_irqrestore(&host->lock, flags);
 }
 
 static irqreturn_t pxamci_detect_irq(int irq, void *devid)
@@ -625,7 +646,9 @@ static int pxamci_probe(struct platform_device *pdev)
 	struct mmc_host *mmc;
 	struct pxamci_host *host = NULL;
 	struct resource *r, *dmarx, *dmatx;
+	struct pxad_param param_rx, param_tx;
 	int ret, irq, gpio_cd = -1, gpio_ro = -1, gpio_power = -1;
+	dma_cap_mask_t mask;
 
 	ret = pxamci_of_init(pdev);
 	if (ret)
@@ -671,7 +694,6 @@ static int pxamci_probe(struct platform_device *pdev)
 
 	host = mmc_priv(mmc);
 	host->mmc = mmc;
-	host->dma = -1;
 	host->pdata = pdev->dev.platform_data;
 	host->clkrt = CLKRT_OFF;
 
@@ -702,12 +724,6 @@ static int pxamci_probe(struct platform_device *pdev)
 				     MMC_CAP_SD_HIGHSPEED;
 	}
 
-	host->sg_cpu = dma_alloc_coherent(&pdev->dev, PAGE_SIZE, &host->sg_dma, GFP_KERNEL);
-	if (!host->sg_cpu) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
 	spin_lock_init(&host->lock);
 	host->res = r;
 	host->irq = irq;
@@ -728,32 +744,45 @@ static int pxamci_probe(struct platform_device *pdev)
 	writel(64, host->base + MMC_RESTO);
 	writel(host->imask, host->base + MMC_I_MASK);
 
-	host->dma = pxa_request_dma(DRIVER_NAME, DMA_PRIO_LOW,
-				    pxamci_dma_irq, host);
-	if (host->dma < 0) {
-		ret = -EBUSY;
-		goto out;
-	}
-
 	ret = request_irq(host->irq, pxamci_irq, 0, DRIVER_NAME, host);
 	if (ret)
 		goto out;
 
 	platform_set_drvdata(pdev, mmc);
 
-	dmarx = platform_get_resource(pdev, IORESOURCE_DMA, 0);
-	if (!dmarx) {
-		ret = -ENXIO;
+	if (!pdev->dev.of_node) {
+		dmarx = platform_get_resource(pdev, IORESOURCE_DMA, 0);
+		dmatx = platform_get_resource(pdev, IORESOURCE_DMA, 1);
+		if (!dmarx || !dmatx) {
+			ret = -ENXIO;
+			goto out;
+		}
+		param_rx.prio = PXAD_PRIO_LOWEST;
+		param_rx.drcmr = dmarx->start;
+		param_tx.prio = PXAD_PRIO_LOWEST;
+		param_tx.drcmr = dmatx->start;
+	}
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	host->dma_chan_rx =
+		dma_request_slave_channel_compat(mask, pxad_filter_fn,
+						 &param_rx, &pdev->dev, "rx");
+	if (host->dma_chan_rx == NULL) {
+		dev_err(&pdev->dev, "unable to request rx dma channel\n");
+		ret = -ENODEV;
 		goto out;
 	}
-	host->dma_drcmrrx = dmarx->start;
 
-	dmatx = platform_get_resource(pdev, IORESOURCE_DMA, 1);
-	if (!dmatx) {
-		ret = -ENXIO;
+	host->dma_chan_tx =
+		dma_request_slave_channel_compat(mask, pxad_filter_fn,
+						 &param_tx,  &pdev->dev, "tx");
+	if (host->dma_chan_tx == NULL) {
+		dev_err(&pdev->dev, "unable to request tx dma channel\n");
+		ret = -ENODEV;
 		goto out;
 	}
-	host->dma_drcmrtx = dmatx->start;
 
 	if (host->pdata) {
 		gpio_cd = host->pdata->gpio_card_detect;
@@ -814,12 +843,12 @@ static int pxamci_probe(struct platform_device *pdev)
 	gpio_free(gpio_power);
  out:
 	if (host) {
-		if (host->dma >= 0)
-			pxa_free_dma(host->dma);
+		if (host->dma_chan_rx)
+			dma_release_channel(host->dma_chan_rx);
+		if (host->dma_chan_tx)
+			dma_release_channel(host->dma_chan_tx);
 		if (host->base)
 			iounmap(host->base);
-		if (host->sg_cpu)
-			dma_free_coherent(&pdev->dev, PAGE_SIZE, host->sg_cpu, host->sg_dma);
 		if (host->clk)
 			clk_put(host->clk);
 	}
@@ -863,13 +892,12 @@ static int pxamci_remove(struct platform_device *pdev)
 		       END_CMD_RES|PRG_DONE|DATA_TRAN_DONE,
 		       host->base + MMC_I_MASK);
 
-		DRCMR(host->dma_drcmrrx) = 0;
-		DRCMR(host->dma_drcmrtx) = 0;
-
 		free_irq(host->irq, host);
-		pxa_free_dma(host->dma);
+		dmaengine_terminate_all(host->dma_chan_rx);
+		dmaengine_terminate_all(host->dma_chan_tx);
+		dma_release_channel(host->dma_chan_rx);
+		dma_release_channel(host->dma_chan_tx);
 		iounmap(host->base);
-		dma_free_coherent(&pdev->dev, PAGE_SIZE, host->sg_cpu, host->sg_dma);
 
 		clk_put(host->clk);
 

From d1955c3a9a1d89c53c9e7ce3b87799f8db1855c1 Mon Sep 17 00:00:00 2001
From: Suneel Garapati <suneel.garapati@xilinx.com>
Date: Tue, 9 Jun 2015 13:01:50 +0530
Subject: [PATCH 268/734] mmc: sdhci: add quirk
 SDHCI_QUIRK_CLOCK_DIV_ZERO_BROKEN

adds quirk for controllers whose clock divider zero is broken,
sdhci_set_clock function will incorporate this modification.

Signed-off-by: Suneel Garapati <suneel.garapati@xilinx.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci.c | 3 +++
 drivers/mmc/host/sdhci.h | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 1dbe932320309f..2ae8415e59f853 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1210,6 +1210,9 @@ void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
 			}
 			real_div = div;
 			div >>= 1;
+			if ((host->quirks2 & SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN)
+				&& !div && host->max_clk <= 25000000)
+				div = 1;
 		}
 	} else {
 		/* Version 2.00 divisors must be a power of 2. */
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 5521d29368e466..67046ca0c1f05d 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -409,6 +409,8 @@ struct sdhci_host {
 #define SDHCI_QUIRK2_SUPPORT_SINGLE			(1<<13)
 /* Controller broken with using ACMD23 */
 #define SDHCI_QUIRK2_ACMD23_BROKEN			(1<<14)
+/* Broken Clock divider zero in controller */
+#define SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN		(1<<15)
 
 	int irq;		/* Device IRQ */
 	void __iomem *ioaddr;	/* Mapped address */

From 2d532d458b43ea273632bf1cbc56c040a4b40898 Mon Sep 17 00:00:00 2001
From: Suneel Garapati <suneel.garapati@xilinx.com>
Date: Tue, 9 Jun 2015 13:01:51 +0530
Subject: [PATCH 269/734] mmc: sdhci-of-arasan: add quirks for broken clock
 base

adding SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,SDHCI_QUIRK2_PRESET_VALUE_BROKEN
flags for arasan sdhc.

Signed-off-by: Suneel Garapati <suneel.garapati@xilinx.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-of-arasan.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c
index 21c0c08dfe54cf..ef5a7d241323a8 100644
--- a/drivers/mmc/host/sdhci-of-arasan.c
+++ b/drivers/mmc/host/sdhci-of-arasan.c
@@ -63,6 +63,9 @@ static struct sdhci_ops sdhci_arasan_ops = {
 
 static struct sdhci_pltfm_data sdhci_arasan_pdata = {
 	.ops = &sdhci_arasan_ops,
+	.quirks = SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
+	.quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
+			SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN,
 };
 
 #ifdef CONFIG_PM_SLEEP

From 8292adc6ba1d59fa227bcc43cec2ce207cbb588c Mon Sep 17 00:00:00 2001
From: Fabian Frederick <fabf@skynet.be>
Date: Tue, 16 Jun 2015 21:15:15 +0200
Subject: [PATCH 270/734] mmc: omap: use for_each_sg() for scatterlist parsing

See Documentation/DMA-API.txt - Part Id

Signed-off-by: Fabian Frederick <fabf@skynet.be>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/omap.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index 68dd6c79c378c8..70dcf074fbe3ff 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c
@@ -948,6 +948,7 @@ mmc_omap_prepare_data(struct mmc_omap_host *host, struct mmc_request *req)
 {
 	struct mmc_data *data = req->data;
 	int i, use_dma = 1, block_size;
+	struct scatterlist *sg;
 	unsigned sg_len;
 
 	host->data = data;
@@ -972,8 +973,8 @@ mmc_omap_prepare_data(struct mmc_omap_host *host, struct mmc_request *req)
 	sg_len = (data->blocks == 1) ? 1 : data->sg_len;
 
 	/* Only do DMA for entire blocks */
-	for (i = 0; i < sg_len; i++) {
-		if ((data->sg[i].length % block_size) != 0) {
+	for_each_sg(data->sg, sg, sg_len, i) {
+		if ((sg->length % block_size) != 0) {
 			use_dma = 0;
 			break;
 		}

From 829b6962f7e3cfc06f7c5c26269fd47ad48cf503 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 24 Jun 2015 14:32:21 +0300
Subject: [PATCH 271/734] mmc: block: don't use parameter prefix if built as
 module

It's excessive to use prefix for the parameters when you do
modprobe mmc-block mmcblk.perdev_minors=16

Make this available only for built-in case.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/card/block.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index a1b820fcb2a6ff..50b89abc8428eb 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -47,10 +47,13 @@
 #include "queue.h"
 
 MODULE_ALIAS("mmc:block");
+
+#ifdef KERNEL
 #ifdef MODULE_PARAM_PREFIX
 #undef MODULE_PARAM_PREFIX
 #endif
 #define MODULE_PARAM_PREFIX "mmcblk."
+#endif
 
 #define INAND_CMD38_ARG_EXT_CSD  113
 #define INAND_CMD38_ARG_ERASE    0x00

From 951b8c875ac905aa9d348c825c380e7ce66c0f62 Mon Sep 17 00:00:00 2001
From: "Ivan T. Ivanov" <ivan.ivanov@linaro.org>
Date: Mon, 6 Jul 2015 14:53:38 +0300
Subject: [PATCH 272/734] mmc: sdhci-msm: Boost controller core clock

Ensure SDCC is working with maximum clock otherwise card
detection could be extremely slow, up to 7 seconds.

Signed-off-by: Ivan T. Ivanov <ivan.ivanov@linaro.org>
Reviewed-by: Georgi Djakov <georgi.djakov@linaro.org>
Acked-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-msm.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index 4a09f7608c66af..4bcee033fedaf5 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -489,6 +489,11 @@ static int sdhci_msm_probe(struct platform_device *pdev)
 		goto pclk_disable;
 	}
 
+	/* Vote for maximum clock rate for maximum performance */
+	ret = clk_set_rate(msm_host->clk, INT_MAX);
+	if (ret)
+		dev_warn(&pdev->dev, "core clock boost failed\n");
+
 	ret = clk_prepare_enable(msm_host->clk);
 	if (ret)
 		goto pclk_disable;

From 88af56552b121295ba905db2523b1622fb4fc1e4 Mon Sep 17 00:00:00 2001
From: "Ivan T. Ivanov" <ivan.ivanov@linaro.org>
Date: Mon, 6 Jul 2015 15:16:19 +0300
Subject: [PATCH 273/734] mmc: sdhci: let GPIO based card detection have higher
 precedence

Controller could have BROKEN_CARD_DETECTION quirk set, but drivers
could use GPIO to detect card present state. Let, when defined, GPIO
take precedence, so drivers could properly detect card state and not
use polling.

Signed-off-by: Ivan T. Ivanov <ivan.ivanov@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 2ae8415e59f853..f85476f86c7175 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1604,15 +1604,21 @@ static int sdhci_do_get_cd(struct sdhci_host *host)
 	if (host->flags & SDHCI_DEVICE_DEAD)
 		return 0;
 
-	/* If polling/nonremovable, assume that the card is always present. */
-	if ((host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION) ||
-	    (host->mmc->caps & MMC_CAP_NONREMOVABLE))
+	/* If nonremovable, assume that the card is always present. */
+	if (host->mmc->caps & MMC_CAP_NONREMOVABLE)
 		return 1;
 
-	/* Try slot gpio detect */
+	/*
+	 * Try slot gpio detect, if defined it take precedence
+	 * over build in controller functionality
+	 */
 	if (!IS_ERR_VALUE(gpio_cd))
 		return !!gpio_cd;
 
+	/* If polling, assume that the card is always present. */
+	if (host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION)
+		return 1;
+
 	/* Host native card detect */
 	return !!(sdhci_readl(host, SDHCI_PRESENT_STATE) & SDHCI_CARD_PRESENT);
 }

From c31d22eb335d4e3d2016135038fac8daaa9708d6 Mon Sep 17 00:00:00 2001
From: "Ivan T. Ivanov" <ivan.ivanov@linaro.org>
Date: Mon, 6 Jul 2015 15:16:20 +0300
Subject: [PATCH 274/734] mmc: sdhci: don't use card state polling when CD GPIO
 is defined

There is no reason to use polling for card detection state change when
drivers are using dedicated GPIO for this. Don't poll in this case.

Signed-off-by: Ivan T. Ivanov <ivan.ivanov@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index f85476f86c7175..cb138d6ad1f299 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -3135,7 +3135,8 @@ int sdhci_add_host(struct sdhci_host *host)
 		mmc->caps |= MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED;
 
 	if ((host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION) &&
-	    !(mmc->caps & MMC_CAP_NONREMOVABLE))
+	    !(mmc->caps & MMC_CAP_NONREMOVABLE) &&
+	    IS_ERR_VALUE(mmc_gpio_get_cd(host->mmc)))
 		mmc->caps |= MMC_CAP_NEEDS_POLL;
 
 	/* If there are external regulators, get them */

From 135b0a284d9c2bcfb64a5f7d90c439ede8bc5e06 Mon Sep 17 00:00:00 2001
From: "Ivan T. Ivanov" <ivan.ivanov@linaro.org>
Date: Mon, 6 Jul 2015 15:16:21 +0300
Subject: [PATCH 275/734] mmc: sdhci: properly check card present state when
 quirk NO_CARD_NO_RESET is set

Controller could have both NO_CARD_NO_RESET and BROKEN_CARD_DETECTION
quirks set. Use sdhci_do_get_cd() when applying NO_CARD_NO_RESET, which
properly check for BROKEN_CARD_DETECTION quirk.

Signed-off-by: Ivan T. Ivanov <ivan.ivanov@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index cb138d6ad1f299..c83d11080da0ac 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -207,8 +207,7 @@ EXPORT_SYMBOL_GPL(sdhci_reset);
 static void sdhci_do_reset(struct sdhci_host *host, u8 mask)
 {
 	if (host->quirks & SDHCI_QUIRK_NO_CARD_NO_RESET) {
-		if (!(sdhci_readl(host, SDHCI_PRESENT_STATE) &
-			SDHCI_CARD_PRESENT))
+		if (!sdhci_do_get_cd(host))
 			return;
 	}
 

From c21e678b256baec428662704138d85cfc593abf4 Mon Sep 17 00:00:00 2001
From: Andreas Fenkart <afenkart@gmail.com>
Date: Tue, 7 Jul 2015 19:53:10 +0200
Subject: [PATCH 276/734] Documentation: dt: update ti,am33xx-hsmmc swakeup
 workaround

Before 5b83b2234be6733cf the driver was hard coding the wakeup irq to
be active low. The generic pm wakeirq does not override the active
high/low parameter, hence it must be specified correctly in the
device tree.
Mind that SDIO IRQ is active low as defined in the SDIO specification

Signed-off-by: Andreas Fenkart <afenkart@gmail.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt b/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt
index 76bf087bc8898f..74166a0d460d9f 100644
--- a/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt
+++ b/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt
@@ -102,7 +102,7 @@ not every application needs SDIO irq, e.g. MMC cards.
 		pinctrl-1 = <&mmc1_idle>;
 		pinctrl-2 = <&mmc1_sleep>;
 		...
-		interrupts-extended = <&intc 64 &gpio2 28 0>;
+		interrupts-extended = <&intc 64 &gpio2 28 GPIO_ACTIVE_LOW>;
 	};
 
 	mmc1_idle : pinmux_cirq_pin {

From f7f0f03506d00fc2939a8625ed141c2dda568fad Mon Sep 17 00:00:00 2001
From: Andreas Fenkart <afenkart@gmail.com>
Date: Tue, 7 Jul 2015 20:38:43 +0200
Subject: [PATCH 277/734] mmc: omap_hsmmc: call omap_hsmmc_set_power directly

If no pdata.set_power was set by the platform code, the driver
was updating pdata with its own fallback function. This is a no-no
since pdata shall be read-only.
This patch pushes the check 'pdata->set_power != NULL' down into
the fallback functions. If pdata.set_power is really set, it calls them
and exits, otherwise the fallback code is used.

Signed-off-by: Andreas Fenkart <afenkart@gmail.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/omap_hsmmc.c | 37 +++++++++++++++++++++--------------
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 4d120323689043..1fad7c746bc3b6 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -213,7 +213,6 @@ struct omap_hsmmc_host {
 	int			context_loss;
 	int			protect_card;
 	int			reqs_blocked;
-	int			use_reg;
 	int			req_in_progress;
 	unsigned long		clk_rate;
 	unsigned int		flags;
@@ -262,6 +261,9 @@ static int omap_hsmmc_set_power(struct device *dev, int power_on, int vdd)
 		platform_get_drvdata(to_platform_device(dev));
 	int ret = 0;
 
+	if (mmc_pdata(host)->set_power)
+		return mmc_pdata(host)->set_power(dev, power_on, vdd);
+
 	/*
 	 * If we don't see a Vcc regulator, assume it's a fixed
 	 * voltage always-on regulator.
@@ -344,6 +346,9 @@ static int omap_hsmmc_reg_get(struct omap_hsmmc_host *host)
 	struct regulator *reg;
 	int ocr_value = 0;
 
+	if (mmc_pdata(host)->set_power)
+		return 0;
+
 	reg = devm_regulator_get(host->dev, "vmmc");
 	if (IS_ERR(reg)) {
 		dev_err(host->dev, "unable to get vmmc regulator %ld\n",
@@ -363,7 +368,6 @@ static int omap_hsmmc_reg_get(struct omap_hsmmc_host *host)
 			}
 		}
 	}
-	mmc_pdata(host)->set_power = omap_hsmmc_set_power;
 
 	/* Allow an aux regulator */
 	reg = devm_regulator_get_optional(host->dev, "vmmc_aux");
@@ -383,8 +387,8 @@ static int omap_hsmmc_reg_get(struct omap_hsmmc_host *host)
 	    (host->vcc_aux && regulator_is_enabled(host->vcc_aux))) {
 		int vdd = ffs(mmc_pdata(host)->ocr_mask) - 1;
 
-		mmc_pdata(host)->set_power(host->dev, 1, vdd);
-		mmc_pdata(host)->set_power(host->dev, 0, 0);
+		omap_hsmmc_set_power(host->dev, 1, vdd);
+		omap_hsmmc_set_power(host->dev, 0, 0);
 	}
 
 	return 0;
@@ -392,7 +396,8 @@ static int omap_hsmmc_reg_get(struct omap_hsmmc_host *host)
 
 static void omap_hsmmc_reg_put(struct omap_hsmmc_host *host)
 {
-	mmc_pdata(host)->set_power = NULL;
+	if (mmc_pdata(host)->set_power)
+		return;
 }
 
 static inline int omap_hsmmc_have_reg(void)
@@ -402,6 +407,11 @@ static inline int omap_hsmmc_have_reg(void)
 
 #else
 
+static int omap_hsmmc_set_power(struct device *dev, int power_on, int vdd)
+{
+	return 0;
+}
+
 static inline int omap_hsmmc_reg_get(struct omap_hsmmc_host *host)
 {
 	return -EINVAL;
@@ -1149,11 +1159,11 @@ static int omap_hsmmc_switch_opcond(struct omap_hsmmc_host *host, int vdd)
 		clk_disable_unprepare(host->dbclk);
 
 	/* Turn the power off */
-	ret = mmc_pdata(host)->set_power(host->dev, 0, 0);
+	ret = omap_hsmmc_set_power(host->dev, 0, 0);
 
 	/* Turn the power ON with given VDD 1.8 or 3.0v */
 	if (!ret)
-		ret = mmc_pdata(host)->set_power(host->dev, 1, vdd);
+		ret = omap_hsmmc_set_power(host->dev, 1, vdd);
 	pm_runtime_get_sync(host->dev);
 	if (host->dbclk)
 		clk_prepare_enable(host->dbclk);
@@ -1552,10 +1562,10 @@ static void omap_hsmmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	if (ios->power_mode != host->power_mode) {
 		switch (ios->power_mode) {
 		case MMC_POWER_OFF:
-			mmc_pdata(host)->set_power(host->dev, 0, 0);
+			omap_hsmmc_set_power(host->dev, 0, 0);
 			break;
 		case MMC_POWER_UP:
-			mmc_pdata(host)->set_power(host->dev, 1, ios->vdd);
+			omap_hsmmc_set_power(host->dev, 1, ios->vdd);
 			break;
 		case MMC_POWER_ON:
 			do_send_init_stream = 1;
@@ -2078,11 +2088,10 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
 		goto err_irq;
 	}
 
-	if (omap_hsmmc_have_reg() && !mmc_pdata(host)->set_power) {
+	if (omap_hsmmc_have_reg()) {
 		ret = omap_hsmmc_reg_get(host);
 		if (ret)
 			goto err_irq;
-		host->use_reg = 1;
 	}
 
 	mmc->ocr_avail = mmc_pdata(host)->ocr_mask;
@@ -2125,8 +2134,7 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
 
 err_slot_name:
 	mmc_remove_host(mmc);
-	if (host->use_reg)
-		omap_hsmmc_reg_put(host);
+	omap_hsmmc_reg_put(host);
 err_irq:
 	device_init_wakeup(&pdev->dev, false);
 	if (host->tx_chan)
@@ -2150,8 +2158,7 @@ static int omap_hsmmc_remove(struct platform_device *pdev)
 
 	pm_runtime_get_sync(host->dev);
 	mmc_remove_host(host->mmc);
-	if (host->use_reg)
-		omap_hsmmc_reg_put(host);
+	omap_hsmmc_reg_put(host);
 
 	if (host->tx_chan)
 		dma_release_channel(host->tx_chan);

From 4c06654c47ee2ff5308b7e89994225ff81b48ef2 Mon Sep 17 00:00:00 2001
From: Andreas Fenkart <afenkart@gmail.com>
Date: Tue, 7 Jul 2015 20:38:44 +0200
Subject: [PATCH 278/734] mmc: omap_hsmmc: regulator automatically released by
 devm

Signed-off-by: Andreas Fenkart <afenkart@gmail.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/omap_hsmmc.c | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 1fad7c746bc3b6..19ae7e69352607 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -394,12 +394,6 @@ static int omap_hsmmc_reg_get(struct omap_hsmmc_host *host)
 	return 0;
 }
 
-static void omap_hsmmc_reg_put(struct omap_hsmmc_host *host)
-{
-	if (mmc_pdata(host)->set_power)
-		return;
-}
-
 static inline int omap_hsmmc_have_reg(void)
 {
 	return 1;
@@ -417,10 +411,6 @@ static inline int omap_hsmmc_reg_get(struct omap_hsmmc_host *host)
 	return -EINVAL;
 }
 
-static inline void omap_hsmmc_reg_put(struct omap_hsmmc_host *host)
-{
-}
-
 static inline int omap_hsmmc_have_reg(void)
 {
 	return 0;
@@ -2134,7 +2124,6 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
 
 err_slot_name:
 	mmc_remove_host(mmc);
-	omap_hsmmc_reg_put(host);
 err_irq:
 	device_init_wakeup(&pdev->dev, false);
 	if (host->tx_chan)
@@ -2158,7 +2147,6 @@ static int omap_hsmmc_remove(struct platform_device *pdev)
 
 	pm_runtime_get_sync(host->dev);
 	mmc_remove_host(host->mmc);
-	omap_hsmmc_reg_put(host);
 
 	if (host->tx_chan)
 		dma_release_channel(host->tx_chan);

From f13e5b9f3c625916d7658ba526574a5d24e4d664 Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@freescale.com>
Date: Fri, 10 Jul 2015 11:36:45 +0800
Subject: [PATCH 279/734] mmc: sdio: avoid using NULL sdio_irq_thread pointer

For Freescale QorIQ LS1021AQDS board, there is a SDIO interrupt
in the process of resume without inserting SD adapter because of
some unknown issue. But the driver doesn't assign sdio_irq_thread
pointer. This will block the resume of kernel. This patch is used
to avoid using NULL sdio_irq_thread pointer.

Signed-off-by: Yangbo Lu <yangbo.lu@freescale.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/host.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 1369e54faeb7e2..83b81fd865f3bb 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -412,7 +412,8 @@ static inline void mmc_signal_sdio_irq(struct mmc_host *host)
 {
 	host->ops->enable_sdio_irq(host, 0);
 	host->sdio_irq_pending = true;
-	wake_up_process(host->sdio_irq_thread);
+	if (host->sdio_irq_thread)
+		wake_up_process(host->sdio_irq_thread);
 }
 
 void sdio_run_irqs(struct mmc_host *host);

From 67b589a2ebe3f9705e8a70275cedec78447f803a Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@freescale.com>
Date: Fri, 10 Jul 2015 11:42:37 +0800
Subject: [PATCH 280/734] mmc: sdhci-esdhc: add default quirk
 SDHCI_QUIRK_NO_HISPD_BIT

eSDHC supports high speed mode, but has no enabling bit for it.
Add this quirk to avoid writing to eSDHC_PROCTL[DTW] by mistake.

Signed-off-by: Yangbo Lu <yangbo.lu@freescale.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-esdhc.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci-esdhc.h b/drivers/mmc/host/sdhci-esdhc.h
index a870c42731d7a4..163ac9974d9101 100644
--- a/drivers/mmc/host/sdhci-esdhc.h
+++ b/drivers/mmc/host/sdhci-esdhc.h
@@ -21,7 +21,8 @@
 #define ESDHC_DEFAULT_QUIRKS	(SDHCI_QUIRK_FORCE_BLK_SZ_2048 | \
 				SDHCI_QUIRK_NO_BUSY_IRQ | \
 				SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK | \
-				SDHCI_QUIRK_PIO_NEEDS_DELAY)
+				SDHCI_QUIRK_PIO_NEEDS_DELAY | \
+				SDHCI_QUIRK_NO_HISPD_BIT)
 
 #define ESDHC_SYSTEM_CONTROL	0x2c
 #define ESDHC_CLOCK_MASK	0x0000fff0

From 7d70d476d0112b44bf93747934ff089c50549e6d Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@freescale.com>
Date: Fri, 10 Jul 2015 11:44:03 +0800
Subject: [PATCH 281/734] mmc: block: add fixup of broken CMD23 for Sandisk
 card

Some Sandisk cards(such as "SDMB-32" and "SDM032" cards)
can't support CMD23, and would generate CMD timeout. So add
FIX-UP for these two types Sandisk cards.

Error log:
mmcblk0: timed out sending SET_BLOCK_COUNT command, card status 0x400900
mmcblk0: timed out sending SET_BLOCK_COUNT command, card status 0x400900
mmcblk0: timed out sending SET_BLOCK_COUNT command, card status 0x400900
end_request: I/O error, dev mmcblk0, sector 0
Buffer I/O error on device mmcblk0, logical block 0
mmcblk0: timed out sending SET_BLOCK_COUNT command, card status 0x400900

Signed-off-by: Yangbo Lu <yangbo.lu@freescale.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/card/block.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 50b89abc8428eb..a58287e574cc81 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -2411,6 +2411,10 @@ static const struct mmc_fixup blk_fixups[] =
 	 *
 	 * N.B. This doesn't affect SD cards.
 	 */
+	MMC_FIXUP("SDMB-32", CID_MANFID_SANDISK, CID_OEMID_ANY, add_quirk_mmc,
+		  MMC_QUIRK_BLK_NO_CMD23),
+	MMC_FIXUP("SDM032", CID_MANFID_SANDISK, CID_OEMID_ANY, add_quirk_mmc,
+		  MMC_QUIRK_BLK_NO_CMD23),
 	MMC_FIXUP("MMC08G", CID_MANFID_TOSHIBA, CID_OEMID_ANY, add_quirk_mmc,
 		  MMC_QUIRK_BLK_NO_CMD23),
 	MMC_FIXUP("MMC16G", CID_MANFID_TOSHIBA, CID_OEMID_ANY, add_quirk_mmc,

From 5959b32e3636f9bfe3f869d1e440bc4a4d660965 Mon Sep 17 00:00:00 2001
From: Alexey Brodkin <Alexey.Brodkin@synopsys.com>
Date: Thu, 25 Jun 2015 11:25:07 +0300
Subject: [PATCH 282/734] mmc: dw_mmc: handle data blocks > than 4kB if IDMAC
 is used

As per DW MobileStorage databook "each descriptor can transfer up to 4kB
of data in chained mode", moreover buffer size that is put in "des1" is
limited to 13 bits, i.e. for example on attempt to
IDMAC_SET_BUFFER1_SIZE(desc, 8192) size value that's effectively written
will be 0.

On the platform with 8kB PAGE_SIZE I see dw_mmc gets data blocks in
SG-list of 8kB size and that leads to unpredictable behavior of the
SD/MMC controller.

In particular on write to FAT partition of SD-card the controller will
stuck in the middle of DMA transaction.

Solution to the problem is simple - we need to pass large (> 4kB) data
buffers to the controller via multiple descriptors. And that's what
that change does.

What's interesting I did try original driver on same platform but
configured with 4kB PAGE_SIZE and may confirm that data blocks passed
in SG-list to dw_mmc never exeed 4kB limit - that explains why nobody
ever faced a problem I did.

Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Cc: Seungwon Jeon <tgih.jun@samsung.com>
Cc: Jaehoon Chung <jh80.chung@samsung.com>
Cc: Ulf Hansson <ulf.hansson@linaro.org>
Cc: arc-linux-dev@synopsys.com
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>
---
 drivers/mmc/host/dw_mmc.c | 109 +++++++++++++++++++++++++-------------
 1 file changed, 71 insertions(+), 38 deletions(-)

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 40e9d8e45f25c6..e41fb7405426e6 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -99,6 +99,9 @@ struct idmac_desc {
 
 	__le32		des3;	/* buffer 2 physical address */
 };
+
+/* Each descriptor can transfer up to 4KB of data in chained mode */
+#define DW_MCI_DESC_DATA_LENGTH	0x1000
 #endif /* CONFIG_MMC_DW_IDMAC */
 
 static bool dw_mci_reset(struct dw_mci *host);
@@ -462,66 +465,96 @@ static void dw_mci_idmac_complete_dma(struct dw_mci *host)
 static void dw_mci_translate_sglist(struct dw_mci *host, struct mmc_data *data,
 				    unsigned int sg_len)
 {
+	unsigned int desc_len;
 	int i;
 	if (host->dma_64bit_address == 1) {
-		struct idmac_desc_64addr *desc = host->sg_cpu;
+		struct idmac_desc_64addr *desc_first, *desc_last, *desc;
+
+		desc_first = desc_last = desc = host->sg_cpu;
 
-		for (i = 0; i < sg_len; i++, desc++) {
+		for (i = 0; i < sg_len; i++) {
 			unsigned int length = sg_dma_len(&data->sg[i]);
 			u64 mem_addr = sg_dma_address(&data->sg[i]);
 
-			/*
-			 * Set the OWN bit and disable interrupts for this
-			 * descriptor
-			 */
-			desc->des0 = IDMAC_DES0_OWN | IDMAC_DES0_DIC |
-						IDMAC_DES0_CH;
-			/* Buffer length */
-			IDMAC_64ADDR_SET_BUFFER1_SIZE(desc, length);
-
-			/* Physical address to DMA to/from */
-			desc->des4 = mem_addr & 0xffffffff;
-			desc->des5 = mem_addr >> 32;
+			for ( ; length ; desc++) {
+				desc_len = (length <= DW_MCI_DESC_DATA_LENGTH) ?
+					   length : DW_MCI_DESC_DATA_LENGTH;
+
+				length -= desc_len;
+
+				/*
+				 * Set the OWN bit and disable interrupts
+				 * for this descriptor
+				 */
+				desc->des0 = IDMAC_DES0_OWN | IDMAC_DES0_DIC |
+							IDMAC_DES0_CH;
+
+				/* Buffer length */
+				IDMAC_64ADDR_SET_BUFFER1_SIZE(desc, desc_len);
+
+				/* Physical address to DMA to/from */
+				desc->des4 = mem_addr & 0xffffffff;
+				desc->des5 = mem_addr >> 32;
+
+				/* Update physical address for the next desc */
+				mem_addr += desc_len;
+
+				/* Save pointer to the last descriptor */
+				desc_last = desc;
+			}
 		}
 
 		/* Set first descriptor */
-		desc = host->sg_cpu;
-		desc->des0 |= IDMAC_DES0_FD;
+		desc_first->des0 |= IDMAC_DES0_FD;
 
 		/* Set last descriptor */
-		desc = host->sg_cpu + (i - 1) *
-				sizeof(struct idmac_desc_64addr);
-		desc->des0 &= ~(IDMAC_DES0_CH | IDMAC_DES0_DIC);
-		desc->des0 |= IDMAC_DES0_LD;
+		desc_last->des0 &= ~(IDMAC_DES0_CH | IDMAC_DES0_DIC);
+		desc_last->des0 |= IDMAC_DES0_LD;
 
 	} else {
-		struct idmac_desc *desc = host->sg_cpu;
+		struct idmac_desc *desc_first, *desc_last, *desc;
+
+		desc_first = desc_last = desc = host->sg_cpu;
 
-		for (i = 0; i < sg_len; i++, desc++) {
+		for (i = 0; i < sg_len; i++) {
 			unsigned int length = sg_dma_len(&data->sg[i]);
 			u32 mem_addr = sg_dma_address(&data->sg[i]);
 
-			/*
-			 * Set the OWN bit and disable interrupts for this
-			 * descriptor
-			 */
-			desc->des0 = cpu_to_le32(IDMAC_DES0_OWN |
-					IDMAC_DES0_DIC | IDMAC_DES0_CH);
-			/* Buffer length */
-			IDMAC_SET_BUFFER1_SIZE(desc, length);
+			for ( ; length ; desc++) {
+				desc_len = (length <= DW_MCI_DESC_DATA_LENGTH) ?
+					   length : DW_MCI_DESC_DATA_LENGTH;
+
+				length -= desc_len;
+
+				/*
+				 * Set the OWN bit and disable interrupts
+				 * for this descriptor
+				 */
+				desc->des0 = cpu_to_le32(IDMAC_DES0_OWN |
+							 IDMAC_DES0_DIC |
+							 IDMAC_DES0_CH);
+
+				/* Buffer length */
+				IDMAC_SET_BUFFER1_SIZE(desc, desc_len);
 
-			/* Physical address to DMA to/from */
-			desc->des2 = cpu_to_le32(mem_addr);
+				/* Physical address to DMA to/from */
+				desc->des2 = cpu_to_le32(mem_addr);
+
+				/* Update physical address for the next desc */
+				mem_addr += desc_len;
+
+				/* Save pointer to the last descriptor */
+				desc_last = desc;
+			}
 		}
 
 		/* Set first descriptor */
-		desc = host->sg_cpu;
-		desc->des0 |= cpu_to_le32(IDMAC_DES0_FD);
+		desc_first->des0 |= cpu_to_le32(IDMAC_DES0_FD);
 
 		/* Set last descriptor */
-		desc = host->sg_cpu + (i - 1) * sizeof(struct idmac_desc);
-		desc->des0 &= cpu_to_le32(~(IDMAC_DES0_CH | IDMAC_DES0_DIC));
-		desc->des0 |= cpu_to_le32(IDMAC_DES0_LD);
+		desc_last->des0 &= cpu_to_le32(~(IDMAC_DES0_CH |
+					       IDMAC_DES0_DIC));
+		desc_last->des0 |= cpu_to_le32(IDMAC_DES0_LD);
 	}
 
 	wmb();
@@ -2394,7 +2427,7 @@ static int dw_mci_init_slot(struct dw_mci *host, unsigned int id)
 #ifdef CONFIG_MMC_DW_IDMAC
 		mmc->max_segs = host->ring_size;
 		mmc->max_blk_size = 65536;
-		mmc->max_seg_size = 0x1000;
+		mmc->max_seg_size = DW_MCI_DESC_DATA_LENGTH;
 		mmc->max_req_size = mmc->max_seg_size * host->ring_size;
 		mmc->max_blk_count = mmc->max_req_size / 512;
 #else

From 0e3a22c044478b6114a767af4a765c0e33eddd53 Mon Sep 17 00:00:00 2001
From: Shawn Lin <shawn.lin@rock-chips.com>
Date: Mon, 3 Aug 2015 15:07:21 +0800
Subject: [PATCH 283/734] mmc: dw_mmc: Fix coding style issues

This patch fixes the following issues reported by checkpatch.pl:
- use -EINVAL instead of -ENOSYS, to fix warning message:
   "ENOSYS means 'invalid syscall nr' and nothing else"
- split lines whose length is greater than 80 characters
- avoid quoted string split across lines
- use min_t instead of min, to fix warning message:
   "min() should probably be min_t(int, cnt, host->part_buf_count)"
- fix missing a blank line after declarations

Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>
---
 drivers/mmc/host/dw_mmc.c | 90 ++++++++++++++++++++++++---------------
 1 file changed, 56 insertions(+), 34 deletions(-)

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index e41fb7405426e6..3f070d9f4086f2 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -238,8 +238,8 @@ static u32 dw_mci_prepare_command(struct mmc_host *mmc, struct mmc_command *cmd)
 	struct dw_mci *host = slot->host;
 	const struct dw_mci_drv_data *drv_data = slot->host->drv_data;
 	u32 cmdr;
-	cmd->error = -EINPROGRESS;
 
+	cmd->error = -EINPROGRESS;
 	cmdr = cmd->opcode;
 
 	if (cmd->opcode == MMC_STOP_TRANSMISSION ||
@@ -374,7 +374,7 @@ static void dw_mci_start_command(struct dw_mci *host,
 		 cmd->arg, cmd_flags);
 
 	mci_writel(host, CMDARG, cmd->arg);
-	wmb();
+	wmb(); /* drain writebuffer */
 	dw_mci_wait_while_busy(host, cmd_flags);
 
 	mci_writel(host, CMD, cmd_flags | SDMMC_CMD_START);
@@ -383,6 +383,7 @@ static void dw_mci_start_command(struct dw_mci *host,
 static inline void send_stop_abort(struct dw_mci *host, struct mmc_data *data)
 {
 	struct mmc_command *stop = data->stop ? data->stop : &host->stop_abort;
+
 	dw_mci_start_command(host, stop, host->stop_cmdr);
 }
 
@@ -467,6 +468,7 @@ static void dw_mci_translate_sglist(struct dw_mci *host, struct mmc_data *data,
 {
 	unsigned int desc_len;
 	int i;
+
 	if (host->dma_64bit_address == 1) {
 		struct idmac_desc_64addr *desc_first, *desc_last, *desc;
 
@@ -474,6 +476,7 @@ static void dw_mci_translate_sglist(struct dw_mci *host, struct mmc_data *data,
 
 		for (i = 0; i < sg_len; i++) {
 			unsigned int length = sg_dma_len(&data->sg[i]);
+
 			u64 mem_addr = sg_dma_address(&data->sg[i]);
 
 			for ( ; length ; desc++) {
@@ -518,6 +521,7 @@ static void dw_mci_translate_sglist(struct dw_mci *host, struct mmc_data *data,
 
 		for (i = 0; i < sg_len; i++) {
 			unsigned int length = sg_dma_len(&data->sg[i]);
+
 			u32 mem_addr = sg_dma_address(&data->sg[i]);
 
 			for ( ; length ; desc++) {
@@ -557,7 +561,7 @@ static void dw_mci_translate_sglist(struct dw_mci *host, struct mmc_data *data,
 		desc_last->des0 |= cpu_to_le32(IDMAC_DES0_LD);
 	}
 
-	wmb();
+	wmb(); /* drain writebuffer */
 }
 
 static void dw_mci_idmac_start_dma(struct dw_mci *host, unsigned int sg_len)
@@ -575,6 +579,7 @@ static void dw_mci_idmac_start_dma(struct dw_mci *host, unsigned int sg_len)
 	temp |= SDMMC_CTRL_USE_IDMAC;
 	mci_writel(host, CTRL, temp);
 
+	/* drain writebuffer */
 	wmb();
 
 	/* Enable the IDMAC */
@@ -622,7 +627,9 @@ static int dw_mci_idmac_init(struct dw_mci *host)
 		host->ring_size = PAGE_SIZE / sizeof(struct idmac_desc);
 
 		/* Forward link the descriptor list */
-		for (i = 0, p = host->sg_cpu; i < host->ring_size - 1; i++, p++) {
+		for (i = 0, p = host->sg_cpu;
+		     i < host->ring_size - 1;
+		     i++, p++) {
 			p->des3 = cpu_to_le32(host->sg_dma +
 					(sizeof(struct idmac_desc) * (i + 1)));
 			p->des1 = 0;
@@ -751,7 +758,7 @@ static void dw_mci_adjust_fifoth(struct dw_mci *host, struct mmc_data *data)
 	u32 fifo_width = 1 << host->data_shift;
 	u32 blksz_depth = blksz / fifo_width, fifoth_val;
 	u32 msize = 0, rx_wmark = 1, tx_wmark, tx_wmark_invers;
-	int idx = (sizeof(mszs) / sizeof(mszs[0])) - 1;
+	int idx = ARRAY_SIZE(mszs) - 1;
 
 	tx_wmark = (host->fifo_depth) / 2;
 	tx_wmark_invers = host->fifo_depth - tx_wmark;
@@ -876,6 +883,7 @@ static int dw_mci_submit_data_dma(struct dw_mci *host, struct mmc_data *data)
 static void dw_mci_submit_data(struct dw_mci *host, struct mmc_data *data)
 {
 	unsigned long irqflags;
+	int flags = SG_MITER_ATOMIC;
 	u32 temp;
 
 	data->error = -EINPROGRESS;
@@ -892,7 +900,6 @@ static void dw_mci_submit_data(struct dw_mci *host, struct mmc_data *data)
 	}
 
 	if (dw_mci_submit_data_dma(host, data)) {
-		int flags = SG_MITER_ATOMIC;
 		if (host->data->flags & MMC_DATA_READ)
 			flags |= SG_MITER_TO_SG;
 		else
@@ -939,7 +946,7 @@ static void mci_send_cmd(struct dw_mci_slot *slot, u32 cmd, u32 arg)
 	unsigned int cmd_status = 0;
 
 	mci_writel(host, CMDARG, arg);
-	wmb();
+	wmb(); /* drain writebuffer */
 	dw_mci_wait_while_busy(host, cmd);
 	mci_writel(host, CMD, SDMMC_CMD_START | cmd);
 
@@ -1052,7 +1059,7 @@ static void __dw_mci_start_request(struct dw_mci *host,
 
 	if (data) {
 		dw_mci_submit_data(host, data);
-		wmb();
+		wmb(); /* drain writebuffer */
 	}
 
 	dw_mci_start_command(host, cmd, cmdflags);
@@ -1417,14 +1424,15 @@ static int dw_mci_execute_tuning(struct mmc_host *mmc, u32 opcode)
 	struct dw_mci_slot *slot = mmc_priv(mmc);
 	struct dw_mci *host = slot->host;
 	const struct dw_mci_drv_data *drv_data = host->drv_data;
-	int err = -ENOSYS;
+	int err = -EINVAL;
 
 	if (drv_data && drv_data->execute_tuning)
 		err = drv_data->execute_tuning(slot);
 	return err;
 }
 
-static int dw_mci_prepare_hs400_tuning(struct mmc_host *mmc, struct mmc_ios *ios)
+static int dw_mci_prepare_hs400_tuning(struct mmc_host *mmc,
+				       struct mmc_ios *ios)
 {
 	struct dw_mci_slot *slot = mmc_priv(mmc);
 	struct dw_mci *host = slot->host;
@@ -1776,7 +1784,7 @@ static int dw_mci_push_part_bytes(struct dw_mci *host, void *buf, int cnt)
 /* pull first bytes from part_buf, only use during pull */
 static int dw_mci_pull_part_bytes(struct dw_mci *host, void *buf, int cnt)
 {
-	cnt = min(cnt, (int)host->part_buf_count);
+	cnt = min_t(int, cnt, host->part_buf_count);
 	if (cnt) {
 		memcpy(buf, (void *)&host->part_buf + host->part_buf_start,
 		       cnt);
@@ -1802,6 +1810,7 @@ static void dw_mci_push_data16(struct dw_mci *host, void *buf, int cnt)
 	/* try and push anything in the part_buf */
 	if (unlikely(host->part_buf_count)) {
 		int len = dw_mci_push_part_bytes(host, buf, cnt);
+
 		buf += len;
 		cnt -= len;
 		if (host->part_buf_count == 2) {
@@ -1828,6 +1837,7 @@ static void dw_mci_push_data16(struct dw_mci *host, void *buf, int cnt)
 #endif
 	{
 		u16 *pdata = buf;
+
 		for (; cnt >= 2; cnt -= 2)
 			mci_fifo_writew(host->fifo_reg, *pdata++);
 		buf = pdata;
@@ -1852,6 +1862,7 @@ static void dw_mci_pull_data16(struct dw_mci *host, void *buf, int cnt)
 			int len = min(cnt & -2, (int)sizeof(aligned_buf));
 			int items = len >> 1;
 			int i;
+
 			for (i = 0; i < items; ++i)
 				aligned_buf[i] = mci_fifo_readw(host->fifo_reg);
 			/* memcpy from aligned buffer into output buffer */
@@ -1863,6 +1874,7 @@ static void dw_mci_pull_data16(struct dw_mci *host, void *buf, int cnt)
 #endif
 	{
 		u16 *pdata = buf;
+
 		for (; cnt >= 2; cnt -= 2)
 			*pdata++ = mci_fifo_readw(host->fifo_reg);
 		buf = pdata;
@@ -1881,6 +1893,7 @@ static void dw_mci_push_data32(struct dw_mci *host, void *buf, int cnt)
 	/* try and push anything in the part_buf */
 	if (unlikely(host->part_buf_count)) {
 		int len = dw_mci_push_part_bytes(host, buf, cnt);
+
 		buf += len;
 		cnt -= len;
 		if (host->part_buf_count == 4) {
@@ -1907,6 +1920,7 @@ static void dw_mci_push_data32(struct dw_mci *host, void *buf, int cnt)
 #endif
 	{
 		u32 *pdata = buf;
+
 		for (; cnt >= 4; cnt -= 4)
 			mci_fifo_writel(host->fifo_reg, *pdata++);
 		buf = pdata;
@@ -1931,6 +1945,7 @@ static void dw_mci_pull_data32(struct dw_mci *host, void *buf, int cnt)
 			int len = min(cnt & -4, (int)sizeof(aligned_buf));
 			int items = len >> 2;
 			int i;
+
 			for (i = 0; i < items; ++i)
 				aligned_buf[i] = mci_fifo_readl(host->fifo_reg);
 			/* memcpy from aligned buffer into output buffer */
@@ -1942,6 +1957,7 @@ static void dw_mci_pull_data32(struct dw_mci *host, void *buf, int cnt)
 #endif
 	{
 		u32 *pdata = buf;
+
 		for (; cnt >= 4; cnt -= 4)
 			*pdata++ = mci_fifo_readl(host->fifo_reg);
 		buf = pdata;
@@ -1960,6 +1976,7 @@ static void dw_mci_push_data64(struct dw_mci *host, void *buf, int cnt)
 	/* try and push anything in the part_buf */
 	if (unlikely(host->part_buf_count)) {
 		int len = dw_mci_push_part_bytes(host, buf, cnt);
+
 		buf += len;
 		cnt -= len;
 
@@ -1987,6 +2004,7 @@ static void dw_mci_push_data64(struct dw_mci *host, void *buf, int cnt)
 #endif
 	{
 		u64 *pdata = buf;
+
 		for (; cnt >= 8; cnt -= 8)
 			mci_fifo_writeq(host->fifo_reg, *pdata++);
 		buf = pdata;
@@ -2011,6 +2029,7 @@ static void dw_mci_pull_data64(struct dw_mci *host, void *buf, int cnt)
 			int len = min(cnt & -8, (int)sizeof(aligned_buf));
 			int items = len >> 3;
 			int i;
+
 			for (i = 0; i < items; ++i)
 				aligned_buf[i] = mci_fifo_readq(host->fifo_reg);
 
@@ -2023,6 +2042,7 @@ static void dw_mci_pull_data64(struct dw_mci *host, void *buf, int cnt)
 #endif
 	{
 		u64 *pdata = buf;
+
 		for (; cnt >= 8; cnt -= 8)
 			*pdata++ = mci_fifo_readq(host->fifo_reg);
 		buf = pdata;
@@ -2098,7 +2118,7 @@ static void dw_mci_read_data_pio(struct dw_mci *host, bool dto)
 done:
 	sg_miter_stop(sg_miter);
 	host->sg = NULL;
-	smp_wmb();
+	smp_wmb(); /* drain writebuffer */
 	set_bit(EVENT_XFER_COMPLETE, &host->pending_events);
 }
 
@@ -2152,7 +2172,7 @@ static void dw_mci_write_data_pio(struct dw_mci *host)
 done:
 	sg_miter_stop(sg_miter);
 	host->sg = NULL;
-	smp_wmb();
+	smp_wmb(); /* drain writebuffer */
 	set_bit(EVENT_XFER_COMPLETE, &host->pending_events);
 }
 
@@ -2161,7 +2181,7 @@ static void dw_mci_cmd_interrupt(struct dw_mci *host, u32 status)
 	if (!host->cmd_status)
 		host->cmd_status = status;
 
-	smp_wmb();
+	smp_wmb(); /* drain writebuffer */
 
 	set_bit(EVENT_CMD_COMPLETE, &host->pending_events);
 	tasklet_schedule(&host->tasklet);
@@ -2225,7 +2245,7 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id)
 		if (pending & DW_MCI_CMD_ERROR_FLAGS) {
 			mci_writel(host, RINTSTS, DW_MCI_CMD_ERROR_FLAGS);
 			host->cmd_status = pending;
-			smp_wmb();
+			smp_wmb(); /* drain writebuffer */
 			set_bit(EVENT_CMD_COMPLETE, &host->pending_events);
 		}
 
@@ -2233,7 +2253,7 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id)
 			/* if there is an error report DATA_ERROR */
 			mci_writel(host, RINTSTS, DW_MCI_DATA_ERROR_FLAGS);
 			host->data_status = pending;
-			smp_wmb();
+			smp_wmb(); /* drain writebuffer */
 			set_bit(EVENT_DATA_ERROR, &host->pending_events);
 			tasklet_schedule(&host->tasklet);
 		}
@@ -2242,7 +2262,7 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id)
 			mci_writel(host, RINTSTS, SDMMC_INT_DATA_OVER);
 			if (!host->data_status)
 				host->data_status = pending;
-			smp_wmb();
+			smp_wmb(); /* drain writebuffer */
 			if (host->dir_status == DW_MCI_RECV_STATUS) {
 				if (host->sg != NULL)
 					dw_mci_read_data_pio(host, true);
@@ -2506,8 +2526,8 @@ static void dw_mci_init_dma(struct dw_mci *host)
 	if (host->dma_ops->init && host->dma_ops->start &&
 	    host->dma_ops->stop && host->dma_ops->cleanup) {
 		if (host->dma_ops->init(host)) {
-			dev_err(host->dev, "%s: Unable to initialize "
-				"DMA Controller.\n", __func__);
+			dev_err(host->dev, "%s: Unable to initialize DMA Controller.\n",
+				__func__);
 			goto no_dma;
 		}
 	} else {
@@ -2521,7 +2541,6 @@ static void dw_mci_init_dma(struct dw_mci *host)
 no_dma:
 	dev_info(host->dev, "Using PIO mode.\n");
 	host->use_dma = 0;
-	return;
 }
 
 static bool dw_mci_ctrl_reset(struct dw_mci *host, u32 reset)
@@ -2575,6 +2594,7 @@ static bool dw_mci_reset(struct dw_mci *host)
 		if (host->use_dma) {
 			unsigned long timeout = jiffies + msecs_to_jiffies(500);
 			u32 status;
+
 			do {
 				status = mci_readl(host, STATUS);
 				if (!(status & SDMMC_STATUS_DMA_REQ))
@@ -2584,8 +2604,8 @@ static bool dw_mci_reset(struct dw_mci *host)
 
 			if (status & SDMMC_STATUS_DMA_REQ) {
 				dev_err(host->dev,
-					"%s: Timeout waiting for dma_req to "
-					"clear during reset\n", __func__);
+					"%s: Timeout waiting for dma_req to clear during reset\n",
+					__func__);
 				goto ciu_out;
 			}
 
@@ -2596,8 +2616,8 @@ static bool dw_mci_reset(struct dw_mci *host)
 	} else {
 		/* if the controller reset bit did clear, then set clock regs */
 		if (!(mci_readl(host, CTRL) & SDMMC_CTRL_RESET)) {
-			dev_err(host->dev, "%s: fifo/dma reset bits didn't "
-				"clear but ciu was reset, doing clock update\n",
+			dev_err(host->dev,
+				"%s: fifo/dma reset bits didn't clear but ciu was reset, doing clock update\n",
 				__func__);
 			goto ciu_out;
 		}
@@ -2658,8 +2678,8 @@ static struct dw_mci_board *dw_mci_parse_dt(struct dw_mci *host)
 	/* find out number of slots supported */
 	if (of_property_read_u32(dev->of_node, "num-slots",
 				&pdata->num_slots)) {
-		dev_info(dev, "num-slots property not found, "
-				"assuming 1 slot is available\n");
+		dev_info(dev,
+			 "num-slots property not found, assuming 1 slot is available\n");
 		pdata->num_slots = 1;
 	}
 
@@ -2669,8 +2689,8 @@ static struct dw_mci_board *dw_mci_parse_dt(struct dw_mci *host)
 			pdata->quirks |= of_quirks[idx].id;
 
 	if (of_property_read_u32(np, "fifo-depth", &pdata->fifo_depth))
-		dev_info(dev, "fifo-depth property not found, using "
-				"value of FIFOTH register as default\n");
+		dev_info(dev,
+			 "fifo-depth property not found, using value of FIFOTH register as default\n");
 
 	of_property_read_u32(np, "card-detect-delay", &pdata->detect_delay_ms);
 
@@ -2907,11 +2927,11 @@ int dw_mci_probe(struct dw_mci *host)
 	mci_writel(host, INTMASK, SDMMC_INT_CMD_DONE | SDMMC_INT_DATA_OVER |
 		   SDMMC_INT_TXDR | SDMMC_INT_RXDR |
 		   DW_MCI_ERROR_FLAGS);
-	mci_writel(host, CTRL, SDMMC_CTRL_INT_ENABLE); /* Enable mci interrupt */
+	/* Enable mci interrupt */
+	mci_writel(host, CTRL, SDMMC_CTRL_INT_ENABLE);
 
-	dev_info(host->dev, "DW MMC controller at irq %d, "
-		 "%d bit host data width, "
-		 "%u deep fifo\n",
+	dev_info(host->dev,
+		 "DW MMC controller at irq %d,%d bit host data width,%u deep fifo\n",
 		 host->irq, width, fifo_size);
 
 	/* We need at least one slot to succeed */
@@ -2926,8 +2946,9 @@ int dw_mci_probe(struct dw_mci *host)
 	if (init_slots) {
 		dev_info(host->dev, "%d slots initialized\n", init_slots);
 	} else {
-		dev_dbg(host->dev, "attempted to initialize %d slots, "
-					"but failed on all\n", host->num_slots);
+		dev_dbg(host->dev,
+			"attempted to initialize %d slots, but failed on all\n",
+			host->num_slots);
 		goto err_dmaunmap;
 	}
 
@@ -3025,6 +3046,7 @@ int dw_mci_resume(struct dw_mci *host)
 
 	for (i = 0; i < host->num_slots; i++) {
 		struct dw_mci_slot *slot = host->slot[i];
+
 		if (!slot)
 			continue;
 		if (slot->mmc->pm_flags & MMC_PM_KEEP_POWER) {

From 575c319dfe872fc8bf1d57b244fb40f497ab3a47 Mon Sep 17 00:00:00 2001
From: Heiko Stuebner <heiko@sntech.de>
Date: Mon, 3 Aug 2015 17:04:10 +0200
Subject: [PATCH 284/734] mmc: dw_mmc: fix pio mode when internal dmac is
 enabled

The dw_mci_init_dma() may decide to not use dma, but pio instead, caused
by things like wrong dma settings in the system.

Till now the code dw_mci_init_slot() always assumed that dma is available
when CONFIG_MMC_DW_IDMAC was defined, ignoring the host->use_dma var
set during dma init.

So when now the dma init failed for whatever reason, the transfer sizes
would still be set for dma transfers, especially including the maximum
block-count calculated from host->ring_size and resulting in a

[    4.991109] ------------[ cut here ]------------
[    4.991111] kernel BUG at drivers/mmc/core/core.c:256!
[    4.991113] Internal error: Oops - BUG: 0 [#1] SMP ARM

because host->ring_size is 0 in this case and the slot init code uses
the wrong code to calculate the values.

Fix this by selecting the correct calculations using the host->use_dma
variable instead of the CONFIG_MMC_DW_IDMAC config option.

Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>
---
 drivers/mmc/host/dw_mmc.c  | 27 ++++++++++++++-------------
 include/linux/mmc/dw_mmc.h |  4 ----
 2 files changed, 14 insertions(+), 17 deletions(-)

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 3f070d9f4086f2..3c0e1993e737bf 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -2444,19 +2444,20 @@ static int dw_mci_init_slot(struct dw_mci *host, unsigned int id)
 		mmc->max_seg_size = host->pdata->blk_settings->max_seg_size;
 	} else {
 		/* Useful defaults if platform data is unset. */
-#ifdef CONFIG_MMC_DW_IDMAC
-		mmc->max_segs = host->ring_size;
-		mmc->max_blk_size = 65536;
-		mmc->max_seg_size = DW_MCI_DESC_DATA_LENGTH;
-		mmc->max_req_size = mmc->max_seg_size * host->ring_size;
-		mmc->max_blk_count = mmc->max_req_size / 512;
-#else
-		mmc->max_segs = 64;
-		mmc->max_blk_size = 65536; /* BLKSIZ is 16 bits */
-		mmc->max_blk_count = 512;
-		mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count;
-		mmc->max_seg_size = mmc->max_req_size;
-#endif /* CONFIG_MMC_DW_IDMAC */
+		if (host->use_dma) {
+			mmc->max_segs = host->ring_size;
+			mmc->max_blk_size = 65536;
+			mmc->max_seg_size = 0x1000;
+			mmc->max_req_size = mmc->max_seg_size * host->ring_size;
+			mmc->max_blk_count = mmc->max_req_size / 512;
+		} else {
+			mmc->max_segs = 64;
+			mmc->max_blk_size = 65536; /* BLKSIZ is 16 bits */
+			mmc->max_blk_count = 512;
+			mmc->max_req_size = mmc->max_blk_size *
+					    mmc->max_blk_count;
+			mmc->max_seg_size = mmc->max_req_size;
+		}
 	}
 
 	if (dw_mci_get_cd(mmc))
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index 5be97676f1fa02..1d88bf72c65ffc 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -153,11 +153,7 @@ struct dw_mci {
 	dma_addr_t		sg_dma;
 	void			*sg_cpu;
 	const struct dw_mci_dma_ops	*dma_ops;
-#ifdef CONFIG_MMC_DW_IDMAC
 	unsigned int		ring_size;
-#else
-	struct dw_mci_dma_data	*dma_data;
-#endif
 	u32			cmd_status;
 	u32			data_status;
 	u32			stop_cmdr;

From 9e747b7e4af090a4974005f3064963026f519fcc Mon Sep 17 00:00:00 2001
From: Jaehoon Chung <jh80.chung@samsung.com>
Date: Thu, 6 Aug 2015 16:23:24 +0900
Subject: [PATCH 285/734] mmc: dw_mmc: fix the wrong condition checking

When num-slots is lower than 1, it's right that should be returned -ENODEV.

Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>
---
 drivers/mmc/host/dw_mmc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 3c0e1993e737bf..de88e640097e61 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -2760,7 +2760,7 @@ int dw_mci_probe(struct dw_mci *host)
 		}
 	}
 
-	if (host->pdata->num_slots > 1) {
+	if (host->pdata->num_slots < 1) {
 		dev_err(host->dev,
 			"Platform data must supply num_slots.\n");
 		return -ENODEV;

From 2b708df2b3edf6e91a4c8d89e7aedb0f861d0e41 Mon Sep 17 00:00:00 2001
From: Jaehoon Chung <jh80.chung@samsung.com>
Date: Thu, 6 Aug 2015 16:23:25 +0900
Subject: [PATCH 286/734] mmc: dw_mmc: remove the unused blk_setting

"blk_setting" doesn't use anywhere.

Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>
---
 drivers/mmc/host/dw_mmc.c  | 34 +++++++++++++---------------------
 include/linux/mmc/dw_mmc.h |  1 -
 2 files changed, 13 insertions(+), 22 deletions(-)

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index de88e640097e61..091df65f781375 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -2436,28 +2436,20 @@ static int dw_mci_init_slot(struct dw_mci *host, unsigned int id)
 	if (ret)
 		goto err_host_allocated;
 
-	if (host->pdata->blk_settings) {
-		mmc->max_segs = host->pdata->blk_settings->max_segs;
-		mmc->max_blk_size = host->pdata->blk_settings->max_blk_size;
-		mmc->max_blk_count = host->pdata->blk_settings->max_blk_count;
-		mmc->max_req_size = host->pdata->blk_settings->max_req_size;
-		mmc->max_seg_size = host->pdata->blk_settings->max_seg_size;
+	/* Useful defaults if platform data is unset. */
+	if (host->use_dma) {
+		mmc->max_segs = host->ring_size;
+		mmc->max_blk_size = 65536;
+		mmc->max_seg_size = 0x1000;
+		mmc->max_req_size = mmc->max_seg_size * host->ring_size;
+		mmc->max_blk_count = mmc->max_req_size / 512;
 	} else {
-		/* Useful defaults if platform data is unset. */
-		if (host->use_dma) {
-			mmc->max_segs = host->ring_size;
-			mmc->max_blk_size = 65536;
-			mmc->max_seg_size = 0x1000;
-			mmc->max_req_size = mmc->max_seg_size * host->ring_size;
-			mmc->max_blk_count = mmc->max_req_size / 512;
-		} else {
-			mmc->max_segs = 64;
-			mmc->max_blk_size = 65536; /* BLKSIZ is 16 bits */
-			mmc->max_blk_count = 512;
-			mmc->max_req_size = mmc->max_blk_size *
-					    mmc->max_blk_count;
-			mmc->max_seg_size = mmc->max_req_size;
-		}
+		mmc->max_segs = 64;
+		mmc->max_blk_size = 65536; /* BLKSIZ is 16 bits */
+		mmc->max_blk_count = 512;
+		mmc->max_req_size = mmc->max_blk_size *
+				    mmc->max_blk_count;
+		mmc->max_seg_size = mmc->max_req_size;
 	}
 
 	if (dw_mci_get_cd(mmc))
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index 1d88bf72c65ffc..b14fcb5012f179 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -255,7 +255,6 @@ struct dw_mci_board {
 
 	struct dw_mci_dma_ops *dma_ops;
 	struct dma_pdata *data;
-	struct block_settings *blk_settings;
 };
 
 #endif /* LINUX_MMC_DW_MMC_H */

From 40a7a463a80ae9e3b85a9fd85cf704fb9ac28b6f Mon Sep 17 00:00:00 2001
From: Jaehoon Chung <jh80.chung@samsung.com>
Date: Thu, 6 Aug 2015 16:23:26 +0900
Subject: [PATCH 287/734] mmc: dw_mmc: print the message for deprecated
 property

supports-highspeed was deprecated.
If someone use it, we need to notice information for it.

Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>
---
 drivers/mmc/host/dw_mmc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 091df65f781375..f8ac14a146936f 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -2696,8 +2696,10 @@ static struct dw_mci_board *dw_mci_parse_dt(struct dw_mci *host)
 			return ERR_PTR(ret);
 	}
 
-	if (of_find_property(np, "supports-highspeed", NULL))
+	if (of_find_property(np, "supports-highspeed", NULL)) {
+		dev_info(dev, "supports-highspeed property is deprecated.\n");
 		pdata->caps |= MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED;
+	}
 
 	return pdata;
 }

From 57e104864bc4874a36796fd222d8d084dbf90b9b Mon Sep 17 00:00:00 2001
From: Addy Ke <addy.ke@rock-chips.com>
Date: Tue, 11 Aug 2015 01:27:18 +0900
Subject: [PATCH 288/734] mmc: dw_mmc: add quirk for broken data transfer over
 scheme

This patch add a new quirk to add a s/w timer to notify the driver
to terminate current transfer and report a data timeout to the core,
if DTO interrupt does NOT come within the given time.

dw_mmc call mmc_request_done func to finish transfer depends on
DTO interrupt. If DTO interrupt does not come in sending data state,
the current transfer will be blocked.

We got the reply from synopsys:
There are two counters but both use the same value of [31:8] bits.
Data timeout counter doesn't wait for stop clock and you should get
DRTO even when the clock is not stopped.
Host Starvation timeout counter is triggered with stop clock condition.

This means that host should get DRTO and DTO interrupt.

But this case really exists, when driver reads tuning data from
card on RK3288-pink2 board. I measured waveforms by oscilloscope
and found that card clock was always on and data lines were always
holded high level in sending data state.

There are two possibility that data over interrupt doesn't come in
reading data state on RK3X SoCs:
- get command done interrupt, but doesn't get any data-related interrupt.
- get data error interrupt, but doesn't get data over interrupt.

Signed-off-by: Addy Ke <addy.ke@rock-chips.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>
---
 drivers/mmc/host/dw_mmc-rockchip.c |  3 ++
 drivers/mmc/host/dw_mmc.c          | 64 +++++++++++++++++++++++++++++-
 include/linux/mmc/dw_mmc.h         |  4 ++
 3 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/host/dw_mmc-rockchip.c b/drivers/mmc/host/dw_mmc-rockchip.c
index de15121bba7dff..bc76aa22473ea2 100644
--- a/drivers/mmc/host/dw_mmc-rockchip.c
+++ b/drivers/mmc/host/dw_mmc-rockchip.c
@@ -73,6 +73,9 @@ static int dw_mci_rockchip_init(struct dw_mci *host)
 	/* It is slot 8 on Rockchip SoCs */
 	host->sdio_id0 = 8;
 
+	/* It needs this quirk on all Rockchip SoCs */
+	host->pdata->quirks |= DW_MCI_QUIRK_BROKEN_DTO;
+
 	return 0;
 }
 
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index f8ac14a146936f..fcbf5524fd3136 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -1574,6 +1574,20 @@ static int dw_mci_data_complete(struct dw_mci *host, struct mmc_data *data)
 	return data->error;
 }
 
+static void dw_mci_set_drto(struct dw_mci *host)
+{
+	unsigned int drto_clks;
+	unsigned int drto_ms;
+
+	drto_clks = mci_readl(host, TMOUT) >> 8;
+	drto_ms = DIV_ROUND_UP(drto_clks, host->bus_hz / 1000);
+
+	/* add a bit spare time */
+	drto_ms += 10;
+
+	mod_timer(&host->dto_timer, jiffies + msecs_to_jiffies(drto_ms));
+}
+
 static void dw_mci_tasklet_func(unsigned long priv)
 {
 	struct dw_mci *host = (struct dw_mci *)priv;
@@ -1651,8 +1665,16 @@ static void dw_mci_tasklet_func(unsigned long priv)
 			}
 
 			if (!test_and_clear_bit(EVENT_XFER_COMPLETE,
-						&host->pending_events))
+						&host->pending_events)) {
+				/*
+				 * If all data-related interrupts don't come
+				 * within the given time in reading data state.
+				 */
+				if ((host->quirks & DW_MCI_QUIRK_BROKEN_DTO) &&
+				    (host->dir_status == DW_MCI_RECV_STATUS))
+					dw_mci_set_drto(host);
 				break;
+			}
 
 			set_bit(EVENT_XFER_COMPLETE, &host->completed_events);
 
@@ -1685,8 +1707,17 @@ static void dw_mci_tasklet_func(unsigned long priv)
 
 		case STATE_DATA_BUSY:
 			if (!test_and_clear_bit(EVENT_DATA_COMPLETE,
-						&host->pending_events))
+						&host->pending_events)) {
+				/*
+				 * If data error interrupt comes but data over
+				 * interrupt doesn't come within the given time.
+				 * in reading data state.
+				 */
+				if ((host->quirks & DW_MCI_QUIRK_BROKEN_DTO) &&
+				    (host->dir_status == DW_MCI_RECV_STATUS))
+					dw_mci_set_drto(host);
 				break;
+			}
 
 			host->data = NULL;
 			set_bit(EVENT_DATA_COMPLETE, &host->completed_events);
@@ -2259,6 +2290,9 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id)
 		}
 
 		if (pending & SDMMC_INT_DATA_OVER) {
+			if (host->quirks & DW_MCI_QUIRK_BROKEN_DTO)
+				del_timer(&host->dto_timer);
+
 			mci_writel(host, RINTSTS, SDMMC_INT_DATA_OVER);
 			if (!host->data_status)
 				host->data_status = pending;
@@ -2644,6 +2678,28 @@ static void dw_mci_cmd11_timer(unsigned long arg)
 	tasklet_schedule(&host->tasklet);
 }
 
+static void dw_mci_dto_timer(unsigned long arg)
+{
+	struct dw_mci *host = (struct dw_mci *)arg;
+
+	switch (host->state) {
+	case STATE_SENDING_DATA:
+	case STATE_DATA_BUSY:
+		/*
+		 * If DTO interrupt does NOT come in sending data state,
+		 * we should notify the driver to terminate current transfer
+		 * and report a data timeout to the core.
+		 */
+		host->data_status = SDMMC_INT_DRTO;
+		set_bit(EVENT_DATA_ERROR, &host->pending_events);
+		set_bit(EVENT_DATA_COMPLETE, &host->pending_events);
+		tasklet_schedule(&host->tasklet);
+		break;
+	default:
+		break;
+	}
+}
+
 #ifdef CONFIG_OF
 static struct dw_mci_of_quirks {
 	char *quirk;
@@ -2822,6 +2878,10 @@ int dw_mci_probe(struct dw_mci *host)
 
 	host->quirks = host->pdata->quirks;
 
+	if (host->quirks & DW_MCI_QUIRK_BROKEN_DTO)
+		setup_timer(&host->dto_timer,
+			    dw_mci_dto_timer, (unsigned long)host);
+
 	spin_lock_init(&host->lock);
 	spin_lock_init(&host->irq_lock);
 	INIT_LIST_HEAD(&host->queue);
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index b14fcb5012f179..134c5742274094 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -98,6 +98,7 @@ struct mmc_data;
  * @irq_flags: The flags to be passed to request_irq.
  * @irq: The irq value to be passed to request_irq.
  * @sdio_id0: Number of slot0 in the SDIO interrupt registers.
+ * @dto_timer: Timer for broken data transfer over scheme.
  *
  * Locking
  * =======
@@ -200,6 +201,7 @@ struct dw_mci {
 	int			sdio_id0;
 
 	struct timer_list       cmd11_timer;
+	struct timer_list       dto_timer;
 };
 
 /* DMA ops for Internal/External DMAC interface */
@@ -222,6 +224,8 @@ struct dw_mci_dma_ops {
 #define DW_MCI_QUIRK_HIGHSPEED			BIT(2)
 /* Unreliable card detection */
 #define DW_MCI_QUIRK_BROKEN_CARD_DETECTION	BIT(3)
+/* Timer for broken data transfer over scheme */
+#define DW_MCI_QUIRK_BROKEN_DTO			BIT(4)
 
 struct dma_pdata;
 

From cf27ec930be906e142c752f9161197d69ca534d7 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 11 Aug 2015 16:48:32 +0100
Subject: [PATCH 289/734] iommu/io-pgtable-arm: Unmap and free table when
 overwriting with block

When installing a block mapping, we unconditionally overwrite a non-leaf
PTE if we find one. However, this can cause a problem if the following
sequence of events occur:

  (1) iommu_map called for a 4k (i.e. PAGE_SIZE) mapping at some address
      - We initialise the page table all the way down to a leaf entry
      - No TLB maintenance is required, because we're going from invalid
        to valid.

  (2) iommu_unmap is called on the mapping installed in (1)
      - We walk the page table to the final (leaf) entry and zero it
      - We only changed a valid leaf entry, so we invalidate leaf-only

  (3) iommu_map is called on the same address as (1), but this time for
      a 2MB (i.e. BLOCK_SIZE) mapping)
      - We walk the page table down to the penultimate level, where we
        find a table entry
      - We overwrite the table entry with a block mapping and return
        without any TLB maintenance and without freeing the memory used
        by the now-orphaned table.

This last step can lead to a walk-cache caching the overwritten table
entry, causing unexpected faults when the new mapping is accessed by a
device. One way to fix this would be to collapse the page table when
freeing the last page at a given level, but this would require expensive
iteration on every map call. Instead, this patch detects the case when
we are overwriting a table entry and explicitly unmaps the table first,
which takes care of both freeing and TLB invalidation.

Cc: <stable@vger.kernel.org>
Reported-by: Brian Starkey <brian.starkey@arm.com>
Tested-by: Brian Starkey <brian.starkey@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/io-pgtable-arm.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index e4bc2b23ab96e3..73c07482f48763 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -263,6 +263,10 @@ static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte,
 					   sizeof(pte), DMA_TO_DEVICE);
 }
 
+static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
+			    unsigned long iova, size_t size, int lvl,
+			    arm_lpae_iopte *ptep);
+
 static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 			     unsigned long iova, phys_addr_t paddr,
 			     arm_lpae_iopte prot, int lvl,
@@ -271,10 +275,21 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 	arm_lpae_iopte pte = prot;
 	struct io_pgtable_cfg *cfg = &data->iop.cfg;
 
-	/* We require an unmap first */
 	if (iopte_leaf(*ptep, lvl)) {
+		/* We require an unmap first */
 		WARN_ON(!selftest_running);
 		return -EEXIST;
+	} else if (iopte_type(*ptep, lvl) == ARM_LPAE_PTE_TYPE_TABLE) {
+		/*
+		 * We need to unmap and free the old table before
+		 * overwriting it with a block entry.
+		 */
+		arm_lpae_iopte *tblp;
+		size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
+
+		tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data);
+		if (WARN_ON(__arm_lpae_unmap(data, iova, sz, lvl, tblp) != sz))
+			return -EINVAL;
 	}
 
 	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)

From 57fb907da89977640ef183556a621336c1348fa0 Mon Sep 17 00:00:00 2001
From: Emil Medve <Emilian.Medve@Freescale.com>
Date: Wed, 25 Mar 2015 00:28:48 -0500
Subject: [PATCH 290/734] iommu/fsl: Really fix init section(s) content

'0f1fb99 iommu/fsl: Fix section mismatch' was intended to address the modpost
warning and the potential crash. Crash which is actually easy to trigger with a
'unbind' followed by a 'bind' sequence. The fix is wrong as
fsl_of_pamu_driver.driver gets added by bus_add_driver() to a couple of
klist(s) which become invalid/corrupted as soon as the init sections are freed.
Depending on when/how the init sections storage is reused various/random errors
and crashes will happen

'cd70d46 iommu/fsl: Various cleanups' contains annotations that go further down
the wrong path laid by '0f1fb99 iommu/fsl: Fix section mismatch'

Now remove all the incorrect annotations from the above mentioned patches (not
exactly a revert) and those previously existing in the code, This fixes the
modpost warning(s), the unbind/bind sequence crashes and the random
errors/crashes

Fixes: 0f1fb99b62ce ("iommu/fsl: Fix section mismatch")
Fixes: cd70d4659ff3 ("iommu/fsl: Various cleanups")
Signed-off-by: Emil Medve <Emilian.Medve@Freescale.com>
Acked-by: Varun Sethi <Varun.Sethi@freescale.com>
Cc: stable@vger.kernel.org
Tested-by: Madalin Bucur <Madalin.Bucur@freescale.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/fsl_pamu.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c
index abeedc9a78c27c..2570f2a25dc432 100644
--- a/drivers/iommu/fsl_pamu.c
+++ b/drivers/iommu/fsl_pamu.c
@@ -41,7 +41,6 @@ struct pamu_isr_data {
 
 static struct paace *ppaact;
 static struct paace *spaact;
-static struct ome *omt __initdata;
 
 /*
  * Table for matching compatible strings, for device tree
@@ -50,7 +49,7 @@ static struct ome *omt __initdata;
  * SOCs. For the older SOCs "fsl,qoriq-device-config-1.0"
  * string would be used.
  */
-static const struct of_device_id guts_device_ids[] __initconst = {
+static const struct of_device_id guts_device_ids[] = {
 	{ .compatible = "fsl,qoriq-device-config-1.0", },
 	{ .compatible = "fsl,qoriq-device-config-2.0", },
 	{}
@@ -599,7 +598,7 @@ u32 get_stash_id(u32 stash_dest_hint, u32 vcpu)
  * Memory accesses to QMAN and BMAN private memory need not be coherent, so
  * clear the PAACE entry coherency attribute for them.
  */
-static void __init setup_qbman_paace(struct paace *ppaace, int  paace_type)
+static void setup_qbman_paace(struct paace *ppaace, int  paace_type)
 {
 	switch (paace_type) {
 	case QMAN_PAACE:
@@ -629,7 +628,7 @@ static void __init setup_qbman_paace(struct paace *ppaace, int  paace_type)
  * this table to translate device transaction to appropriate corenet
  * transaction.
  */
-static void __init setup_omt(struct ome *omt)
+static void setup_omt(struct ome *omt)
 {
 	struct ome *ome;
 
@@ -666,7 +665,7 @@ static void __init setup_omt(struct ome *omt)
  * Get the maximum number of PAACT table entries
  * and subwindows supported by PAMU
  */
-static void __init get_pamu_cap_values(unsigned long pamu_reg_base)
+static void get_pamu_cap_values(unsigned long pamu_reg_base)
 {
 	u32 pc_val;
 
@@ -676,9 +675,9 @@ static void __init get_pamu_cap_values(unsigned long pamu_reg_base)
 }
 
 /* Setup PAMU registers pointing to PAACT, SPAACT and OMT */
-static int __init setup_one_pamu(unsigned long pamu_reg_base, unsigned long pamu_reg_size,
-				 phys_addr_t ppaact_phys, phys_addr_t spaact_phys,
-				 phys_addr_t omt_phys)
+static int setup_one_pamu(unsigned long pamu_reg_base, unsigned long pamu_reg_size,
+			  phys_addr_t ppaact_phys, phys_addr_t spaact_phys,
+			  phys_addr_t omt_phys)
 {
 	u32 *pc;
 	struct pamu_mmap_regs *pamu_regs;
@@ -720,7 +719,7 @@ static int __init setup_one_pamu(unsigned long pamu_reg_base, unsigned long pamu
 }
 
 /* Enable all device LIODNS */
-static void __init setup_liodns(void)
+static void setup_liodns(void)
 {
 	int i, len;
 	struct paace *ppaace;
@@ -846,7 +845,7 @@ struct ccsr_law {
 /*
  * Create a coherence subdomain for a given memory block.
  */
-static int __init create_csd(phys_addr_t phys, size_t size, u32 csd_port_id)
+static int create_csd(phys_addr_t phys, size_t size, u32 csd_port_id)
 {
 	struct device_node *np;
 	const __be32 *iprop;
@@ -988,7 +987,7 @@ static int __init create_csd(phys_addr_t phys, size_t size, u32 csd_port_id)
 static const struct {
 	u32 svr;
 	u32 port_id;
-} port_id_map[] __initconst = {
+} port_id_map[] = {
 	{(SVR_P2040 << 8) | 0x10, 0xFF000000},	/* P2040 1.0 */
 	{(SVR_P2040 << 8) | 0x11, 0xFF000000},	/* P2040 1.1 */
 	{(SVR_P2041 << 8) | 0x10, 0xFF000000},	/* P2041 1.0 */
@@ -1006,7 +1005,7 @@ static const struct {
 
 #define SVR_SECURITY	0x80000	/* The Security (E) bit */
 
-static int __init fsl_pamu_probe(struct platform_device *pdev)
+static int fsl_pamu_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	void __iomem *pamu_regs = NULL;
@@ -1022,6 +1021,7 @@ static int __init fsl_pamu_probe(struct platform_device *pdev)
 	int irq;
 	phys_addr_t ppaact_phys;
 	phys_addr_t spaact_phys;
+	struct ome *omt;
 	phys_addr_t omt_phys;
 	size_t mem_size = 0;
 	unsigned int order = 0;
@@ -1200,7 +1200,7 @@ static int __init fsl_pamu_probe(struct platform_device *pdev)
 	return ret;
 }
 
-static struct platform_driver fsl_of_pamu_driver __initdata = {
+static struct platform_driver fsl_of_pamu_driver = {
 	.driver = {
 		.name = "fsl-of-pamu",
 	},

From 5556e7e6d30e8e9b5ee51b0e5edd526ee80e5e36 Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@canonical.com>
Date: Wed, 5 Aug 2015 11:26:36 -0500
Subject: [PATCH 291/734] eCryptfs: Invalidate dcache entries when lower
 i_nlink is zero

Consider eCryptfs dcache entries to be stale when the corresponding
lower inode's i_nlink count is zero. This solves a problem caused by the
lower inode being directly modified, without going through the eCryptfs
mount, leaving stale eCryptfs dentries cached and the eCryptfs inode's
i_nlink count not being cleared.

Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Reported-by: Richard Weinberger <richard@nod.at>
Cc: stable@vger.kernel.org
---
 fs/ecryptfs/dentry.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 8db0b464483f9b..63cd2c147221aa 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -45,20 +45,20 @@
 static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags)
 {
 	struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
-	int rc;
-
-	if (!(lower_dentry->d_flags & DCACHE_OP_REVALIDATE))
-		return 1;
+	int rc = 1;
 
 	if (flags & LOOKUP_RCU)
 		return -ECHILD;
 
-	rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags);
+	if (lower_dentry->d_flags & DCACHE_OP_REVALIDATE)
+		rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags);
+
 	if (d_really_is_positive(dentry)) {
-		struct inode *lower_inode =
-			ecryptfs_inode_to_lower(d_inode(dentry));
+		struct inode *inode = d_inode(dentry);
 
-		fsstack_copy_attr_all(d_inode(dentry), lower_inode);
+		fsstack_copy_attr_all(inode, ecryptfs_inode_to_lower(inode));
+		if (!inode->i_nlink)
+			return 0;
 	}
 	return rc;
 }

From 0dad87fcb732691bfd3183acccda6709e1e759ca Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Fri, 26 Jun 2015 18:18:54 +0200
Subject: [PATCH 292/734] eCryptfs: Delete a check before the function call
 "key_put"

The key_put() function tests whether its argument is NULL and then
returns immediately. Thus the test around this call might not be needed.

This issue was detected by using the Coccinelle software.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
---
 fs/ecryptfs/crypto.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 97315f2f68164f..80d6901493cf5e 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -258,8 +258,7 @@ void ecryptfs_destroy_mount_crypt_stat(
 				 &mount_crypt_stat->global_auth_tok_list,
 				 mount_crypt_stat_list) {
 		list_del(&auth_tok->mount_crypt_stat_list);
-		if (auth_tok->global_auth_tok_key
-		    && !(auth_tok->flags & ECRYPTFS_AUTH_TOK_INVALID))
+		if (!(auth_tok->flags & ECRYPTFS_AUTH_TOK_INVALID))
 			key_put(auth_tok->global_auth_tok_key);
 		kmem_cache_free(ecryptfs_global_auth_tok_cache, auth_tok);
 	}

From 7a67832c7e44c20935c5d6f2264035a0f7bf0d8f Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 19 Aug 2015 00:34:34 -0400
Subject: [PATCH 293/734] libnvdimm, e820: make CONFIG_X86_PMEM_LEGACY a
 tristate option

We currently register a platform device for e820 type-12 memory and
register a nvdimm bus beneath it.  Registering the platform device
triggers the device-core machinery to probe for a driver, but that
search currently comes up empty.  Building the nvdimm-bus registration
into the e820_pmem platform device registration in this way forces
libnvdimm to be built-in.  Instead, convert the built-in portion of
CONFIG_X86_PMEM_LEGACY to simply register a platform device and move the
rest of the logic to the driver for e820_pmem, for the following
reasons:

1/ Letting e820_pmem support be a module allows building and testing
   libnvdimm.ko changes without rebooting

2/ All the normal policy around modules can be applied to e820_pmem
   (unbind to disable and/or blacklisting the module from loading by
   default)

3/ Moving the driver to a generic location and converting it to scan
   "iomem_resource" rather than "e820.map" means any other architecture can
   take advantage of this simple nvdimm resource discovery mechanism by
   registering a resource named "Persistent Memory (legacy)"

Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/x86/Kconfig                 |  6 ++-
 arch/x86/include/uapi/asm/e820.h |  2 +-
 arch/x86/kernel/Makefile         |  2 +-
 arch/x86/kernel/pmem.c           | 79 +++--------------------------
 drivers/nvdimm/Makefile          |  3 ++
 drivers/nvdimm/e820.c            | 86 ++++++++++++++++++++++++++++++++
 tools/testing/nvdimm/Kbuild      |  4 ++
 7 files changed, 108 insertions(+), 74 deletions(-)
 create mode 100644 drivers/nvdimm/e820.c

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b3a1a5d77d92c2..76c61154ed5051 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1426,10 +1426,14 @@ config ILLEGAL_POINTER_VALUE
 
 source "mm/Kconfig"
 
+config X86_PMEM_LEGACY_DEVICE
+	bool
+
 config X86_PMEM_LEGACY
-	bool "Support non-standard NVDIMMs and ADR protected memory"
+	tristate "Support non-standard NVDIMMs and ADR protected memory"
 	depends on PHYS_ADDR_T_64BIT
 	depends on BLK_DEV
+	select X86_PMEM_LEGACY_DEVICE
 	select LIBNVDIMM
 	help
 	  Treat memory marked using the non-standard e820 type of 12 as used
diff --git a/arch/x86/include/uapi/asm/e820.h b/arch/x86/include/uapi/asm/e820.h
index 0f457e6eab18ee..9dafe59cf6e2fe 100644
--- a/arch/x86/include/uapi/asm/e820.h
+++ b/arch/x86/include/uapi/asm/e820.h
@@ -37,7 +37,7 @@
 /*
  * This is a non-standardized way to represent ADR or NVDIMM regions that
  * persist over a reboot.  The kernel will ignore their special capabilities
- * unless the CONFIG_X86_PMEM_LEGACY=y option is set.
+ * unless the CONFIG_X86_PMEM_LEGACY option is set.
  *
  * ( Note that older platforms also used 6 for the same type of memory,
  *   but newer versions switched to 12 as 6 was assigned differently.  Some
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 0f15af41bd80b7..ac2bb7e28ba292 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -92,7 +92,7 @@ obj-$(CONFIG_KVM_GUEST)		+= kvm.o kvmclock.o
 obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_$(BITS).o
 obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
 obj-$(CONFIG_PARAVIRT_CLOCK)	+= pvclock.o
-obj-$(CONFIG_X86_PMEM_LEGACY)	+= pmem.o
+obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o
 
 obj-$(CONFIG_PCSPKR_PLATFORM)	+= pcspeaker.o
 
diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c
index 64f90f53bb854f..4f00b63d7ff33b 100644
--- a/arch/x86/kernel/pmem.c
+++ b/arch/x86/kernel/pmem.c
@@ -3,80 +3,17 @@
  * Copyright (c) 2015, Intel Corporation.
  */
 #include <linux/platform_device.h>
-#include <linux/libnvdimm.h>
 #include <linux/module.h>
-#include <asm/e820.h>
-
-static void e820_pmem_release(struct device *dev)
-{
-	struct nvdimm_bus *nvdimm_bus = dev->platform_data;
-
-	if (nvdimm_bus)
-		nvdimm_bus_unregister(nvdimm_bus);
-}
-
-static struct platform_device e820_pmem = {
-	.name = "e820_pmem",
-	.id = -1,
-	.dev = {
-		.release = e820_pmem_release,
-	},
-};
-
-static const struct attribute_group *e820_pmem_attribute_groups[] = {
-	&nvdimm_bus_attribute_group,
-	NULL,
-};
-
-static const struct attribute_group *e820_pmem_region_attribute_groups[] = {
-	&nd_region_attribute_group,
-	&nd_device_attribute_group,
-	NULL,
-};
 
 static __init int register_e820_pmem(void)
 {
-	static struct nvdimm_bus_descriptor nd_desc;
-	struct device *dev = &e820_pmem.dev;
-	struct nvdimm_bus *nvdimm_bus;
-	int rc, i;
-
-	rc = platform_device_register(&e820_pmem);
-	if (rc)
-		return rc;
-
-	nd_desc.attr_groups = e820_pmem_attribute_groups;
-	nd_desc.provider_name = "e820";
-	nvdimm_bus = nvdimm_bus_register(dev, &nd_desc);
-	if (!nvdimm_bus)
-		goto err;
-	dev->platform_data = nvdimm_bus;
-
-	for (i = 0; i < e820.nr_map; i++) {
-		struct e820entry *ei = &e820.map[i];
-		struct resource res = {
-			.flags	= IORESOURCE_MEM,
-			.start	= ei->addr,
-			.end	= ei->addr + ei->size - 1,
-		};
-		struct nd_region_desc ndr_desc;
-
-		if (ei->type != E820_PRAM)
-			continue;
-
-		memset(&ndr_desc, 0, sizeof(ndr_desc));
-		ndr_desc.res = &res;
-		ndr_desc.attr_groups = e820_pmem_region_attribute_groups;
-		ndr_desc.numa_node = NUMA_NO_NODE;
-		if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc))
-			goto err;
-	}
-
-	return 0;
-
- err:
-	dev_err(dev, "failed to register legacy persistent memory ranges\n");
-	platform_device_unregister(&e820_pmem);
-	return -ENXIO;
+	struct platform_device *pdev;
+
+	/*
+	 * See drivers/nvdimm/e820.c for the implementation, this is
+	 * simply here to trigger the module to load on demand.
+	 */
+	pdev = platform_device_alloc("e820_pmem", -1);
+	return platform_device_add(pdev);
 }
 device_initcall(register_e820_pmem);
diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile
index 594bb97c867a7b..9bf15db52dee4f 100644
--- a/drivers/nvdimm/Makefile
+++ b/drivers/nvdimm/Makefile
@@ -2,6 +2,7 @@ obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
 obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
 obj-$(CONFIG_ND_BTT) += nd_btt.o
 obj-$(CONFIG_ND_BLK) += nd_blk.o
+obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o
 
 nd_pmem-y := pmem.o
 
@@ -9,6 +10,8 @@ nd_btt-y := btt.o
 
 nd_blk-y := blk.o
 
+nd_e820-y := e820.o
+
 libnvdimm-y := core.o
 libnvdimm-y += bus.o
 libnvdimm-y += dimm_devs.o
diff --git a/drivers/nvdimm/e820.c b/drivers/nvdimm/e820.c
new file mode 100644
index 00000000000000..1b5743ad92db3d
--- /dev/null
+++ b/drivers/nvdimm/e820.c
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2015, Christoph Hellwig.
+ * Copyright (c) 2015, Intel Corporation.
+ */
+#include <linux/platform_device.h>
+#include <linux/libnvdimm.h>
+#include <linux/module.h>
+
+static const struct attribute_group *e820_pmem_attribute_groups[] = {
+	&nvdimm_bus_attribute_group,
+	NULL,
+};
+
+static const struct attribute_group *e820_pmem_region_attribute_groups[] = {
+	&nd_region_attribute_group,
+	&nd_device_attribute_group,
+	NULL,
+};
+
+static int e820_pmem_remove(struct platform_device *pdev)
+{
+	struct nvdimm_bus *nvdimm_bus = platform_get_drvdata(pdev);
+
+	nvdimm_bus_unregister(nvdimm_bus);
+	return 0;
+}
+
+static int e820_pmem_probe(struct platform_device *pdev)
+{
+	static struct nvdimm_bus_descriptor nd_desc;
+	struct device *dev = &pdev->dev;
+	struct nvdimm_bus *nvdimm_bus;
+	struct resource *p;
+
+	nd_desc.attr_groups = e820_pmem_attribute_groups;
+	nd_desc.provider_name = "e820";
+	nvdimm_bus = nvdimm_bus_register(dev, &nd_desc);
+	if (!nvdimm_bus)
+		goto err;
+	platform_set_drvdata(pdev, nvdimm_bus);
+
+	for (p = iomem_resource.child; p ; p = p->sibling) {
+		struct nd_region_desc ndr_desc;
+
+		if (strncmp(p->name, "Persistent Memory (legacy)", 26) != 0)
+			continue;
+
+		memset(&ndr_desc, 0, sizeof(ndr_desc));
+		ndr_desc.res = p;
+		ndr_desc.attr_groups = e820_pmem_region_attribute_groups;
+		ndr_desc.numa_node = NUMA_NO_NODE;
+		if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc))
+			goto err;
+	}
+
+	return 0;
+
+ err:
+	nvdimm_bus_unregister(nvdimm_bus);
+	dev_err(dev, "failed to register legacy persistent memory ranges\n");
+	return -ENXIO;
+}
+
+static struct platform_driver e820_pmem_driver = {
+	.probe = e820_pmem_probe,
+	.remove = e820_pmem_remove,
+	.driver = {
+		.name = "e820_pmem",
+	},
+};
+
+static __init int e820_pmem_init(void)
+{
+	return platform_driver_register(&e820_pmem_driver);
+}
+
+static __exit void e820_pmem_exit(void)
+{
+	platform_driver_unregister(&e820_pmem_driver);
+}
+
+MODULE_ALIAS("platform:e820_pmem*");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Intel Corporation");
+module_init(e820_pmem_init);
+module_exit(e820_pmem_exit);
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild
index f56914c7929b80..d7c136a9634661 100644
--- a/tools/testing/nvdimm/Kbuild
+++ b/tools/testing/nvdimm/Kbuild
@@ -15,6 +15,7 @@ obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
 obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
 obj-$(CONFIG_ND_BTT) += nd_btt.o
 obj-$(CONFIG_ND_BLK) += nd_blk.o
+obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o
 obj-$(CONFIG_ACPI_NFIT) += nfit.o
 
 nfit-y := $(ACPI_SRC)/nfit.o
@@ -29,6 +30,9 @@ nd_btt-y += config_check.o
 nd_blk-y := $(NVDIMM_SRC)/blk.o
 nd_blk-y += config_check.o
 
+nd_e820-y := $(NVDIMM_SRC)/e820.o
+nd_e820-y += config_check.o
+
 libnvdimm-y := $(NVDIMM_SRC)/core.o
 libnvdimm-y += $(NVDIMM_SRC)/bus.o
 libnvdimm-y += $(NVDIMM_SRC)/dimm_devs.o

From 3c42f0b803613c4888aa2214e86d6ea746cb94de Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Tue, 16 Jun 2015 18:00:37 -0700
Subject: [PATCH 294/734] scripts/tags.sh: Include trace_*_rcuidle() in tags

Every tracepoint creates two functions, the usual one 'trace_*()'
and the rcuidle one 'trace_*_rcuidle()'. Add regex for the
rcuidle variant so that we can jump to the tracepoints that use
rcuidle.

Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Michal Marek <mmarek@suse.com>
---
 scripts/tags.sh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/scripts/tags.sh b/scripts/tags.sh
index cdb491d845035e..896bb303f7358f 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh
@@ -170,7 +170,9 @@ exuberant()
 	--regex-c='/^SYSCALL_DEFINE[[:digit:]]?\(([^,)]*).*/sys_\1/' \
 	--regex-c='/^COMPAT_SYSCALL_DEFINE[[:digit:]]?\(([^,)]*).*/compat_sys_\1/' \
 	--regex-c++='/^TRACE_EVENT\(([^,)]*).*/trace_\1/'		\
+	--regex-c++='/^TRACE_EVENT\(([^,)]*).*/trace_\1_rcuidle/'	\
 	--regex-c++='/^DEFINE_EVENT\([^,)]*, *([^,)]*).*/trace_\1/'	\
+	--regex-c++='/^DEFINE_EVENT\([^,)]*, *([^,)]*).*/trace_\1_rcuidle/' \
 	--regex-c++='/PAGEFLAG\(([^,)]*).*/Page\1/'			\
 	--regex-c++='/PAGEFLAG\(([^,)]*).*/SetPage\1/'			\
 	--regex-c++='/PAGEFLAG\(([^,)]*).*/ClearPage\1/'		\
@@ -233,7 +235,9 @@ emacs()
 	--regex='/^SYSCALL_DEFINE[0-9]?(\([^,)]*\).*/sys_\1/'   \
 	--regex='/^COMPAT_SYSCALL_DEFINE[0-9]?(\([^,)]*\).*/compat_sys_\1/' \
 	--regex='/^TRACE_EVENT(\([^,)]*\).*/trace_\1/'		\
+	--regex='/^TRACE_EVENT(\([^,)]*\).*/trace_\1_rcuidle/'	\
 	--regex='/^DEFINE_EVENT([^,)]*, *\([^,)]*\).*/trace_\1/' \
+	--regex='/^DEFINE_EVENT([^,)]*, *\([^,)]*\).*/trace_\1_rcuidle/' \
 	--regex='/PAGEFLAG(\([^,)]*\).*/Page\1/'			\
 	--regex='/PAGEFLAG(\([^,)]*\).*/SetPage\1/'		\
 	--regex='/PAGEFLAG(\([^,)]*\).*/ClearPage\1/'		\

From bf7b00557d10c0b84b6717305815c8fff7007830 Mon Sep 17 00:00:00 2001
From: Riku Voipio <riku.voipio@linaro.org>
Date: Thu, 28 May 2015 12:11:14 +0300
Subject: [PATCH 295/734] deb-pkg: simplify directory creation

Every package needs /usr/share/doc/$package_name and
DEBIAN directory, so create them as part of create_package
function.

Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
Signed-off-by: Michal Marek <mmarek@suse.com>
---
 scripts/package/builddeb | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index d30116b57e7e48..0ccd7ee583ec5e 100755
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -15,6 +15,8 @@ set -e
 create_package() {
 	local pname="$1" pdir="$2"
 
+	mkdir -m 755 -p "$pdir/DEBIAN"
+	mkdir -p "$pdir/usr/share/doc/$pname"
 	cp debian/copyright "$pdir/usr/share/doc/$pname/"
 	cp debian/changelog "$pdir/usr/share/doc/$pname/changelog.Debian"
 	gzip -9 "$pdir/usr/share/doc/$pname/changelog.Debian"
@@ -114,24 +116,13 @@ BUILD_DEBUG="$(grep -s '^CONFIG_DEBUG_INFO=y' $KCONFIG_CONFIG || true)"
 # Setup the directory structure
 rm -rf "$tmpdir" "$fwdir" "$kernel_headers_dir" "$libc_headers_dir" "$dbg_dir"
 mkdir -m 755 -p "$tmpdir/DEBIAN"
-mkdir -p  "$tmpdir/lib" "$tmpdir/boot" "$tmpdir/usr/share/doc/$packagename"
-mkdir -m 755 -p "$fwdir/DEBIAN"
-mkdir -p "$fwdir/lib/firmware/$version/" "$fwdir/usr/share/doc/$fwpackagename"
-mkdir -m 755 -p "$libc_headers_dir/DEBIAN"
-mkdir -p "$libc_headers_dir/usr/share/doc/$libc_headers_packagename"
-mkdir -m 755 -p "$kernel_headers_dir/DEBIAN"
-mkdir -p "$kernel_headers_dir/usr/share/doc/$kernel_headers_packagename"
+mkdir -p "$tmpdir/lib" "$tmpdir/boot"
+mkdir -p "$fwdir/lib/firmware/$version/"
 mkdir -p "$kernel_headers_dir/lib/modules/$version/"
-if [ "$ARCH" = "um" ] ; then
-	mkdir -p "$tmpdir/usr/lib/uml/modules/$version" "$tmpdir/usr/bin"
-fi
-if [ -n "$BUILD_DEBUG" ] ; then
-	mkdir -p "$dbg_dir/usr/share/doc/$dbg_packagename"
-	mkdir -m 755 -p "$dbg_dir/DEBIAN"
-fi
 
 # Build and install the kernel
 if [ "$ARCH" = "um" ] ; then
+	mkdir -p "$tmpdir/usr/lib/uml/modules/$version" "$tmpdir/usr/bin" "$tmpdir/usr/share/doc/$packagename"
 	$MAKE linux
 	cp System.map "$tmpdir/usr/lib/uml/modules/$version/System.map"
 	cp $KCONFIG_CONFIG "$tmpdir/usr/share/doc/$packagename/config"

From 35ffd08d9bc92b5d56f6536406c379d82a757e7a Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Tue, 7 Jul 2015 21:48:23 +0200
Subject: [PATCH 296/734] kconfig: Delete unnecessary checks before the
 function call "sym_calc_value"

The sym_calc_value() function tests whether its argument is NULL and then
returns immediately. Thus the test around the call is not needed.

This issue was detected by using the Coccinelle software.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Michal Marek <mmarek@suse.com>
---
 scripts/kconfig/confdata.c | 7 ++-----
 scripts/kconfig/symbol.c   | 3 +--
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c
index c814f57672fc03..0b7dc2fd7bac0e 100644
--- a/scripts/kconfig/confdata.c
+++ b/scripts/kconfig/confdata.c
@@ -268,8 +268,7 @@ int conf_read_simple(const char *name, int def)
 			goto load;
 		sym_add_change_count(1);
 		if (!sym_defconfig_list) {
-			if (modules_sym)
-				sym_calc_value(modules_sym);
+			sym_calc_value(modules_sym);
 			return 1;
 		}
 
@@ -404,9 +403,7 @@ int conf_read_simple(const char *name, int def)
 	}
 	free(line);
 	fclose(in);
-
-	if (modules_sym)
-		sym_calc_value(modules_sym);
+	sym_calc_value(modules_sym);
 	return 0;
 }
 
diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c
index 70c5ee189dce7c..50878dc025a574 100644
--- a/scripts/kconfig/symbol.c
+++ b/scripts/kconfig/symbol.c
@@ -467,8 +467,7 @@ void sym_clear_all_valid(void)
 	for_all_symbols(i, sym)
 		sym->flags &= ~SYMBOL_VALID;
 	sym_add_change_count(1);
-	if (modules_sym)
-		sym_calc_value(modules_sym);
+	sym_calc_value(modules_sym);
 }
 
 bool sym_tristate_within_range(struct symbol *sym, tristate val)

From c2264564df3d70723c6d6eb96c18b99698c112fd Mon Sep 17 00:00:00 2001
From: Andreas Ruprecht <andreas.ruprecht@fau.de>
Date: Sun, 12 Jul 2015 09:41:50 +0200
Subject: [PATCH 297/734] kconfig: warn of unhandled characters in Kconfig
 commands

In Kconfig, definitions of options take the following form:
"<COMMAND> <PARAM> <PARAM> ...". COMMANDs and PARAMs are treated
slightly different by the underlying parser.

While commit 2e0d737fc76f ("kconfig: don't silently ignore unhandled
characters") introduced a warning for unsupported characters around
PARAMs, it does not cover situations where a COMMAND has additional
characters before it.

This change makes Kconfig emit a warning if superfluous characters
are found before COMMANDs. As the 'help' statement sometimes is
written as '---help---', the '-' character would now also be regarded
as unhandled and generate a warning. To avoid that, '-' is added to
the list of allowed characters, and the token '---help---' is included
in the zconf.gperf file.

Reported-by: Valentin Rothberg <valentinrothberg@gmail.com>
Signed-off-by: Andreas Ruprecht <andreas.ruprecht@fau.de>
Reviewed-by: Ulf Magnusson <ulfalizer@gmail.com>
Tested-by: Ulf Magnusson <ulfalizer@gmail.com>
Signed-off-by: Michal Marek <mmarek@suse.com>
---
 scripts/kconfig/zconf.gperf |  1 +
 scripts/kconfig/zconf.l     | 20 +++++++++++---------
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/scripts/kconfig/zconf.gperf b/scripts/kconfig/zconf.gperf
index b6ac02d604f1fb..ac498f01b449f4 100644
--- a/scripts/kconfig/zconf.gperf
+++ b/scripts/kconfig/zconf.gperf
@@ -22,6 +22,7 @@ comment,	T_COMMENT,	TF_COMMAND
 config,		T_CONFIG,	TF_COMMAND
 menuconfig,	T_MENUCONFIG,	TF_COMMAND
 help,		T_HELP,		TF_COMMAND
+---help---,	T_HELP,		TF_COMMAND
 if,		T_IF,		TF_COMMAND|TF_PARAM
 endif,		T_ENDIF,	TF_COMMAND
 depends,	T_DEPENDS,	TF_COMMAND
diff --git a/scripts/kconfig/zconf.l b/scripts/kconfig/zconf.l
index 200a3fe3009153..c410d257da0602 100644
--- a/scripts/kconfig/zconf.l
+++ b/scripts/kconfig/zconf.l
@@ -66,9 +66,16 @@ static void alloc_string(const char *str, int size)
 	memcpy(text, str, size);
 	text[size] = 0;
 }
+
+static void warn_ignored_character(char chr)
+{
+	fprintf(stderr,
+	        "%s:%d:warning: ignoring unsupported character '%c'\n",
+	        zconf_curname(), zconf_lineno(), chr);
+}
 %}
 
-n	[A-Za-z0-9_]
+n	[A-Za-z0-9_-]
 
 %%
 	int str = 0;
@@ -106,7 +113,7 @@ n	[A-Za-z0-9_]
 		zconflval.string = text;
 		return T_WORD;
 	}
-	.
+	.	warn_ignored_character(*yytext);
 	\n	{
 		BEGIN(INITIAL);
 		current_file->lineno++;
@@ -132,8 +139,7 @@ n	[A-Za-z0-9_]
 		BEGIN(STRING);
 	}
 	\n	BEGIN(INITIAL); current_file->lineno++; return T_EOL;
-	---	/* ignore */
-	({n}|[-/.])+	{
+	({n}|[/.])+	{
 		const struct kconf_id *id = kconf_id_lookup(yytext, yyleng);
 		if (id && id->flags & TF_PARAM) {
 			zconflval.id = id;
@@ -146,11 +152,7 @@ n	[A-Za-z0-9_]
 	#.*	/* comment */
 	\\\n	current_file->lineno++;
 	[[:blank:]]+
-	.	{
-		fprintf(stderr,
-		        "%s:%d:warning: ignoring unsupported character '%c'\n",
-		        zconf_curname(), zconf_lineno(), *yytext);
-	}
+	.	warn_ignored_character(*yytext);
 	<<EOF>> {
 		BEGIN(INITIAL);
 	}

From 09cd75555cd9051bdeac7a29c6ff12d6b9e8341b Mon Sep 17 00:00:00 2001
From: Andreas Ruprecht <andreas.ruprecht@fau.de>
Date: Sun, 12 Jul 2015 09:41:51 +0200
Subject: [PATCH 298/734] kconfig: Regenerate shipped zconf.{hash,lex}.c files

Update the shipped files generated by flex and gperf to support the
explicit use of "---help---" and to emit warnings for unsupported
characters on COMMAND tokens.

As I could not find out which flex/gperf version was used to generate
the previous version, I used flex 2.5.35  and gperf 3.0.4 from
Ubuntu 14.04 - this also leads to the big number of changed lines
in this patch.

Signed-off-by: Andreas Ruprecht <andreas.ruprecht@fau.de>
Reviewed-by: Ulf Magnusson <ulfalizer@gmail.com>
Tested-by: Ulf Magnusson <ulfalizer@gmail.com>
Signed-off-by: Michal Marek <mmarek@suse.com>
---
 scripts/kconfig/zconf.hash.c_shipped |  58 ++---
 scripts/kconfig/zconf.lex.c_shipped  | 325 +++++++++++++--------------
 2 files changed, 192 insertions(+), 191 deletions(-)

diff --git a/scripts/kconfig/zconf.hash.c_shipped b/scripts/kconfig/zconf.hash.c_shipped
index c77a8eff1ef21e..360a62df2b5e1c 100644
--- a/scripts/kconfig/zconf.hash.c_shipped
+++ b/scripts/kconfig/zconf.hash.c_shipped
@@ -50,7 +50,7 @@ kconf_id_hash (register const char *str, register unsigned int len)
       73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
       73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
       73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
-      73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
+      73, 73, 73, 73, 73,  0, 73, 73, 73, 73,
       73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
       73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
       73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
@@ -96,6 +96,7 @@ struct kconf_id_strings_t
     char kconf_id_strings_str7[sizeof("default")];
     char kconf_id_strings_str8[sizeof("tristate")];
     char kconf_id_strings_str9[sizeof("endchoice")];
+    char kconf_id_strings_str10[sizeof("---help---")];
     char kconf_id_strings_str12[sizeof("def_tristate")];
     char kconf_id_strings_str13[sizeof("def_bool")];
     char kconf_id_strings_str14[sizeof("defconfig_list")];
@@ -132,6 +133,7 @@ static const struct kconf_id_strings_t kconf_id_strings_contents =
     "default",
     "tristate",
     "endchoice",
+    "---help---",
     "def_tristate",
     "def_bool",
     "defconfig_list",
@@ -172,7 +174,7 @@ kconf_id_lookup (register const char *str, register unsigned int len)
 {
   enum
     {
-      TOTAL_KEYWORDS = 33,
+      TOTAL_KEYWORDS = 34,
       MIN_WORD_LENGTH = 2,
       MAX_WORD_LENGTH = 14,
       MIN_HASH_VALUE = 2,
@@ -182,34 +184,36 @@ kconf_id_lookup (register const char *str, register unsigned int len)
   static const struct kconf_id wordlist[] =
     {
       {-1}, {-1},
-#line 25 "scripts/kconfig/zconf.gperf"
+#line 26 "scripts/kconfig/zconf.gperf"
       {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str2,		T_IF,		TF_COMMAND|TF_PARAM},
-#line 36 "scripts/kconfig/zconf.gperf"
+#line 37 "scripts/kconfig/zconf.gperf"
       {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str3,		T_TYPE,		TF_COMMAND, S_INT},
       {-1},
-#line 26 "scripts/kconfig/zconf.gperf"
+#line 27 "scripts/kconfig/zconf.gperf"
       {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str5,		T_ENDIF,	TF_COMMAND},
       {-1},
-#line 29 "scripts/kconfig/zconf.gperf"
+#line 30 "scripts/kconfig/zconf.gperf"
       {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str7,	T_DEFAULT,	TF_COMMAND, S_UNKNOWN},
-#line 31 "scripts/kconfig/zconf.gperf"
+#line 32 "scripts/kconfig/zconf.gperf"
       {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str8,	T_TYPE,		TF_COMMAND, S_TRISTATE},
 #line 20 "scripts/kconfig/zconf.gperf"
       {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str9,	T_ENDCHOICE,	TF_COMMAND},
-      {-1}, {-1},
-#line 32 "scripts/kconfig/zconf.gperf"
+#line 25 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str10,	T_HELP,		TF_COMMAND},
+      {-1},
+#line 33 "scripts/kconfig/zconf.gperf"
       {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str12,	T_DEFAULT,	TF_COMMAND, S_TRISTATE},
-#line 35 "scripts/kconfig/zconf.gperf"
+#line 36 "scripts/kconfig/zconf.gperf"
       {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str13,	T_DEFAULT,	TF_COMMAND, S_BOOLEAN},
-#line 45 "scripts/kconfig/zconf.gperf"
+#line 46 "scripts/kconfig/zconf.gperf"
       {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str14,	T_OPT_DEFCONFIG_LIST,TF_OPTION},
       {-1}, {-1},
-#line 43 "scripts/kconfig/zconf.gperf"
+#line 44 "scripts/kconfig/zconf.gperf"
       {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str17,		T_ON,		TF_PARAM},
-#line 28 "scripts/kconfig/zconf.gperf"
+#line 29 "scripts/kconfig/zconf.gperf"
       {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str18,	T_OPTIONAL,	TF_COMMAND},
       {-1}, {-1},
-#line 42 "scripts/kconfig/zconf.gperf"
+#line 43 "scripts/kconfig/zconf.gperf"
       {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str21,		T_OPTION,	TF_COMMAND},
 #line 17 "scripts/kconfig/zconf.gperf"
       {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str22,	T_ENDMENU,	TF_COMMAND},
@@ -219,51 +223,51 @@ kconf_id_lookup (register const char *str, register unsigned int len)
 #line 23 "scripts/kconfig/zconf.gperf"
       {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str25,	T_MENUCONFIG,	TF_COMMAND},
       {-1},
-#line 44 "scripts/kconfig/zconf.gperf"
+#line 45 "scripts/kconfig/zconf.gperf"
       {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str27,	T_OPT_MODULES,	TF_OPTION},
-#line 47 "scripts/kconfig/zconf.gperf"
+#line 48 "scripts/kconfig/zconf.gperf"
       {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str28,	T_OPT_ALLNOCONFIG_Y,TF_OPTION},
 #line 16 "scripts/kconfig/zconf.gperf"
       {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str29,		T_MENU,		TF_COMMAND},
       {-1},
-#line 39 "scripts/kconfig/zconf.gperf"
+#line 40 "scripts/kconfig/zconf.gperf"
       {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str31,		T_SELECT,	TF_COMMAND},
 #line 21 "scripts/kconfig/zconf.gperf"
       {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str32,	T_COMMENT,	TF_COMMAND},
-#line 46 "scripts/kconfig/zconf.gperf"
+#line 47 "scripts/kconfig/zconf.gperf"
       {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str33,		T_OPT_ENV,	TF_OPTION},
       {-1},
-#line 40 "scripts/kconfig/zconf.gperf"
+#line 41 "scripts/kconfig/zconf.gperf"
       {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str35,		T_RANGE,	TF_COMMAND},
 #line 19 "scripts/kconfig/zconf.gperf"
       {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str36,		T_CHOICE,	TF_COMMAND},
       {-1}, {-1},
-#line 33 "scripts/kconfig/zconf.gperf"
+#line 34 "scripts/kconfig/zconf.gperf"
       {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str39,		T_TYPE,		TF_COMMAND, S_BOOLEAN},
       {-1},
 #line 18 "scripts/kconfig/zconf.gperf"
       {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str41,		T_SOURCE,	TF_COMMAND},
-#line 41 "scripts/kconfig/zconf.gperf"
+#line 42 "scripts/kconfig/zconf.gperf"
       {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str42,	T_VISIBLE,	TF_COMMAND},
-#line 37 "scripts/kconfig/zconf.gperf"
+#line 38 "scripts/kconfig/zconf.gperf"
       {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str43,		T_TYPE,		TF_COMMAND, S_HEX},
       {-1}, {-1},
 #line 22 "scripts/kconfig/zconf.gperf"
       {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str46,		T_CONFIG,	TF_COMMAND},
-#line 34 "scripts/kconfig/zconf.gperf"
+#line 35 "scripts/kconfig/zconf.gperf"
       {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str47,	T_TYPE,		TF_COMMAND, S_BOOLEAN},
       {-1}, {-1}, {-1},
-#line 38 "scripts/kconfig/zconf.gperf"
+#line 39 "scripts/kconfig/zconf.gperf"
       {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str51,		T_TYPE,		TF_COMMAND, S_STRING},
       {-1}, {-1},
 #line 24 "scripts/kconfig/zconf.gperf"
       {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str54,		T_HELP,		TF_COMMAND},
       {-1},
-#line 30 "scripts/kconfig/zconf.gperf"
+#line 31 "scripts/kconfig/zconf.gperf"
       {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str56,		T_PROMPT,	TF_COMMAND},
       {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
       {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
-#line 27 "scripts/kconfig/zconf.gperf"
+#line 28 "scripts/kconfig/zconf.gperf"
       {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str72,	T_DEPENDS,	TF_COMMAND}
     };
 
@@ -285,5 +289,5 @@ kconf_id_lookup (register const char *str, register unsigned int len)
     }
   return 0;
 }
-#line 48 "scripts/kconfig/zconf.gperf"
+#line 49 "scripts/kconfig/zconf.gperf"
 
diff --git a/scripts/kconfig/zconf.lex.c_shipped b/scripts/kconfig/zconf.lex.c_shipped
index dd4e86c8252107..37fdf612350586 100644
--- a/scripts/kconfig/zconf.lex.c_shipped
+++ b/scripts/kconfig/zconf.lex.c_shipped
@@ -72,7 +72,6 @@ typedef int flex_int32_t;
 typedef unsigned char flex_uint8_t; 
 typedef unsigned short int flex_uint16_t;
 typedef unsigned int flex_uint32_t;
-#endif /* ! C99 */
 
 /* Limits of integral types. */
 #ifndef INT8_MIN
@@ -103,6 +102,8 @@ typedef unsigned int flex_uint32_t;
 #define UINT32_MAX             (4294967295U)
 #endif
 
+#endif /* ! C99 */
+
 #endif /* ! FLEXINT_H */
 
 #ifdef __cplusplus
@@ -159,7 +160,15 @@ typedef unsigned int flex_uint32_t;
 
 /* Size of default input buffer. */
 #ifndef YY_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k.
+ * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case.
+ * Ditto for the __ia64__ case accordingly.
+ */
+#define YY_BUF_SIZE 32768
+#else
 #define YY_BUF_SIZE 16384
+#endif /* __ia64__ */
 #endif
 
 /* The state buf must be large enough to hold one state per character in the main buffer.
@@ -365,354 +374,338 @@ int zconflineno = 1;
 
 extern char *zconftext;
 #define yytext_ptr zconftext
-static yyconst flex_int16_t yy_nxt[][19] =
+static yyconst flex_int16_t yy_nxt[][18] =
     {
     {
         0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
-        0,    0,    0,    0,    0,    0,    0,    0,    0
+        0,    0,    0,    0,    0,    0,    0,    0
     },
 
     {
        11,   12,   13,   14,   12,   12,   15,   12,   12,   12,
-       12,   12,   12,   12,   12,   12,   12,   12,   12
+       12,   12,   12,   12,   12,   12,   12,   12
     },
 
     {
        11,   12,   13,   14,   12,   12,   15,   12,   12,   12,
-       12,   12,   12,   12,   12,   12,   12,   12,   12
+       12,   12,   12,   12,   12,   12,   12,   12
     },
 
     {
        11,   16,   16,   17,   16,   16,   16,   16,   16,   16,
-       16,   16,   16,   18,   16,   16,   16,   16,   16
+       16,   18,   16,   16,   16,   16,   16,   16
     },
 
     {
        11,   16,   16,   17,   16,   16,   16,   16,   16,   16,
-       16,   16,   16,   18,   16,   16,   16,   16,   16
+       16,   18,   16,   16,   16,   16,   16,   16
 
     },
 
     {
        11,   19,   20,   21,   19,   19,   19,   19,   19,   19,
-       19,   19,   19,   19,   19,   19,   19,   19,   19
+       19,   19,   19,   19,   19,   19,   19,   19
     },
 
     {
        11,   19,   20,   21,   19,   19,   19,   19,   19,   19,
-       19,   19,   19,   19,   19,   19,   19,   19,   19
+       19,   19,   19,   19,   19,   19,   19,   19
     },
 
     {
        11,   22,   22,   23,   22,   24,   22,   22,   24,   22,
-       22,   22,   22,   22,   22,   22,   22,   25,   22
+       22,   22,   22,   22,   22,   22,   25,   22
     },
 
     {
        11,   22,   22,   23,   22,   24,   22,   22,   24,   22,
-       22,   22,   22,   22,   22,   22,   22,   25,   22
+       22,   22,   22,   22,   22,   22,   25,   22
     },
 
     {
        11,   26,   27,   28,   29,   30,   31,   32,   30,   33,
-       34,   35,   36,   36,   37,   38,   39,   40,   41
+       34,   35,   35,   36,   37,   38,   39,   40
 
     },
 
     {
        11,   26,   27,   28,   29,   30,   31,   32,   30,   33,
-       34,   35,   36,   36,   37,   38,   39,   40,   41
+       34,   35,   35,   36,   37,   38,   39,   40
     },
 
     {
       -11,  -11,  -11,  -11,  -11,  -11,  -11,  -11,  -11,  -11,
-      -11,  -11,  -11,  -11,  -11,  -11,  -11,  -11,  -11
+      -11,  -11,  -11,  -11,  -11,  -11,  -11,  -11
     },
 
     {
        11,  -12,  -12,  -12,  -12,  -12,  -12,  -12,  -12,  -12,
-      -12,  -12,  -12,  -12,  -12,  -12,  -12,  -12,  -12
+      -12,  -12,  -12,  -12,  -12,  -12,  -12,  -12
     },
 
     {
-       11,  -13,   42,   43,  -13,  -13,   44,  -13,  -13,  -13,
-      -13,  -13,  -13,  -13,  -13,  -13,  -13,  -13,  -13
+       11,  -13,   41,   42,  -13,  -13,   43,  -13,  -13,  -13,
+      -13,  -13,  -13,  -13,  -13,  -13,  -13,  -13
     },
 
     {
        11,  -14,  -14,  -14,  -14,  -14,  -14,  -14,  -14,  -14,
-      -14,  -14,  -14,  -14,  -14,  -14,  -14,  -14,  -14
+      -14,  -14,  -14,  -14,  -14,  -14,  -14,  -14
 
     },
 
     {
-       11,   45,   45,   46,   45,   45,   45,   45,   45,   45,
-       45,   45,   45,   45,   45,   45,   45,   45,   45
+       11,   44,   44,   45,   44,   44,   44,   44,   44,   44,
+       44,   44,   44,   44,   44,   44,   44,   44
     },
 
     {
        11,  -16,  -16,  -16,  -16,  -16,  -16,  -16,  -16,  -16,
-      -16,  -16,  -16,  -16,  -16,  -16,  -16,  -16,  -16
+      -16,  -16,  -16,  -16,  -16,  -16,  -16,  -16
     },
 
     {
        11,  -17,  -17,  -17,  -17,  -17,  -17,  -17,  -17,  -17,
-      -17,  -17,  -17,  -17,  -17,  -17,  -17,  -17,  -17
+      -17,  -17,  -17,  -17,  -17,  -17,  -17,  -17
     },
 
     {
        11,  -18,  -18,  -18,  -18,  -18,  -18,  -18,  -18,  -18,
-      -18,  -18,  -18,   47,  -18,  -18,  -18,  -18,  -18
+      -18,   46,  -18,  -18,  -18,  -18,  -18,  -18
     },
 
     {
-       11,   48,   48,  -19,   48,   48,   48,   48,   48,   48,
-       48,   48,   48,   48,   48,   48,   48,   48,   48
+       11,   47,   47,  -19,   47,   47,   47,   47,   47,   47,
+       47,   47,   47,   47,   47,   47,   47,   47
 
     },
 
     {
-       11,  -20,   49,   50,  -20,  -20,  -20,  -20,  -20,  -20,
-      -20,  -20,  -20,  -20,  -20,  -20,  -20,  -20,  -20
+       11,  -20,   48,   49,  -20,  -20,  -20,  -20,  -20,  -20,
+      -20,  -20,  -20,  -20,  -20,  -20,  -20,  -20
     },
 
     {
-       11,   51,  -21,  -21,   51,   51,   51,   51,   51,   51,
-       51,   51,   51,   51,   51,   51,   51,   51,   51
+       11,   50,  -21,  -21,   50,   50,   50,   50,   50,   50,
+       50,   50,   50,   50,   50,   50,   50,   50
     },
 
     {
-       11,   52,   52,   53,   52,  -22,   52,   52,  -22,   52,
-       52,   52,   52,   52,   52,   52,   52,  -22,   52
+       11,   51,   51,   52,   51,  -22,   51,   51,  -22,   51,
+       51,   51,   51,   51,   51,   51,  -22,   51
     },
 
     {
        11,  -23,  -23,  -23,  -23,  -23,  -23,  -23,  -23,  -23,
-      -23,  -23,  -23,  -23,  -23,  -23,  -23,  -23,  -23
+      -23,  -23,  -23,  -23,  -23,  -23,  -23,  -23
     },
 
     {
        11,  -24,  -24,  -24,  -24,  -24,  -24,  -24,  -24,  -24,
-      -24,  -24,  -24,  -24,  -24,  -24,  -24,  -24,  -24
+      -24,  -24,  -24,  -24,  -24,  -24,  -24,  -24
 
     },
 
     {
-       11,   54,   54,   55,   54,   54,   54,   54,   54,   54,
-       54,   54,   54,   54,   54,   54,   54,   54,   54
+       11,   53,   53,   54,   53,   53,   53,   53,   53,   53,
+       53,   53,   53,   53,   53,   53,   53,   53
     },
 
     {
        11,  -26,  -26,  -26,  -26,  -26,  -26,  -26,  -26,  -26,
-      -26,  -26,  -26,  -26,  -26,  -26,  -26,  -26,  -26
+      -26,  -26,  -26,  -26,  -26,  -26,  -26,  -26
     },
 
     {
-       11,  -27,   56,  -27,  -27,  -27,  -27,  -27,  -27,  -27,
-      -27,  -27,  -27,  -27,  -27,  -27,  -27,  -27,  -27
+       11,  -27,   55,  -27,  -27,  -27,  -27,  -27,  -27,  -27,
+      -27,  -27,  -27,  -27,  -27,  -27,  -27,  -27
     },
 
     {
        11,  -28,  -28,  -28,  -28,  -28,  -28,  -28,  -28,  -28,
-      -28,  -28,  -28,  -28,  -28,  -28,  -28,  -28,  -28
+      -28,  -28,  -28,  -28,  -28,  -28,  -28,  -28
     },
 
     {
        11,  -29,  -29,  -29,  -29,  -29,  -29,  -29,  -29,  -29,
-      -29,  -29,  -29,  -29,  -29,   57,  -29,  -29,  -29
+      -29,  -29,  -29,  -29,   56,  -29,  -29,  -29
 
     },
 
     {
        11,  -30,  -30,  -30,  -30,  -30,  -30,  -30,  -30,  -30,
-      -30,  -30,  -30,  -30,  -30,  -30,  -30,  -30,  -30
+      -30,  -30,  -30,  -30,  -30,  -30,  -30,  -30
     },
 
     {
-       11,   58,   58,  -31,   58,   58,   58,   58,   58,   58,
-       58,   58,   58,   58,   58,   58,   58,   58,   58
+       11,   57,   57,  -31,   57,   57,   57,   57,   57,   57,
+       57,   57,   57,   57,   57,   57,   57,   57
     },
 
     {
-       11,  -32,  -32,  -32,  -32,  -32,  -32,   59,  -32,  -32,
-      -32,  -32,  -32,  -32,  -32,  -32,  -32,  -32,  -32
+       11,  -32,  -32,  -32,  -32,  -32,  -32,   58,  -32,  -32,
+      -32,  -32,  -32,  -32,  -32,  -32,  -32,  -32
     },
 
     {
        11,  -33,  -33,  -33,  -33,  -33,  -33,  -33,  -33,  -33,
-      -33,  -33,  -33,  -33,  -33,  -33,  -33,  -33,  -33
+      -33,  -33,  -33,  -33,  -33,  -33,  -33,  -33
     },
 
     {
        11,  -34,  -34,  -34,  -34,  -34,  -34,  -34,  -34,  -34,
-      -34,  -34,  -34,  -34,  -34,  -34,  -34,  -34,  -34
+      -34,  -34,  -34,  -34,  -34,  -34,  -34,  -34
 
     },
 
     {
        11,  -35,  -35,  -35,  -35,  -35,  -35,  -35,  -35,  -35,
-      -35,   60,   61,   61,  -35,  -35,  -35,  -35,  -35
+      -35,   59,   59,  -35,  -35,  -35,  -35,  -35
     },
 
     {
        11,  -36,  -36,  -36,  -36,  -36,  -36,  -36,  -36,  -36,
-      -36,   61,   61,   61,  -36,  -36,  -36,  -36,  -36
+      -36,  -36,  -36,  -36,   60,  -36,  -36,  -36
     },
 
     {
        11,  -37,  -37,  -37,  -37,  -37,  -37,  -37,  -37,  -37,
-      -37,  -37,  -37,  -37,  -37,   62,  -37,  -37,  -37
+      -37,  -37,  -37,  -37,  -37,  -37,  -37,  -37
     },
 
     {
        11,  -38,  -38,  -38,  -38,  -38,  -38,  -38,  -38,  -38,
-      -38,  -38,  -38,  -38,  -38,  -38,  -38,  -38,  -38
+      -38,  -38,  -38,  -38,   61,  -38,  -38,  -38
     },
 
     {
-       11,  -39,  -39,  -39,  -39,  -39,  -39,  -39,  -39,  -39,
-      -39,  -39,  -39,  -39,  -39,   63,  -39,  -39,  -39
+       11,  -39,  -39,   62,  -39,  -39,  -39,  -39,  -39,  -39,
+      -39,  -39,  -39,  -39,  -39,  -39,  -39,  -39
 
     },
 
     {
-       11,  -40,  -40,   64,  -40,  -40,  -40,  -40,  -40,  -40,
-      -40,  -40,  -40,  -40,  -40,  -40,  -40,  -40,  -40
+       11,  -40,  -40,  -40,  -40,  -40,  -40,  -40,  -40,  -40,
+      -40,  -40,  -40,  -40,  -40,  -40,  -40,   63
     },
 
     {
-       11,  -41,  -41,  -41,  -41,  -41,  -41,  -41,  -41,  -41,
-      -41,  -41,  -41,  -41,  -41,  -41,  -41,  -41,   65
+       11,  -41,   41,   42,  -41,  -41,   43,  -41,  -41,  -41,
+      -41,  -41,  -41,  -41,  -41,  -41,  -41,  -41
     },
 
     {
-       11,  -42,   42,   43,  -42,  -42,   44,  -42,  -42,  -42,
-      -42,  -42,  -42,  -42,  -42,  -42,  -42,  -42,  -42
+       11,  -42,  -42,  -42,  -42,  -42,  -42,  -42,  -42,  -42,
+      -42,  -42,  -42,  -42,  -42,  -42,  -42,  -42
     },
 
     {
-       11,  -43,  -43,  -43,  -43,  -43,  -43,  -43,  -43,  -43,
-      -43,  -43,  -43,  -43,  -43,  -43,  -43,  -43,  -43
+       11,   44,   44,   45,   44,   44,   44,   44,   44,   44,
+       44,   44,   44,   44,   44,   44,   44,   44
     },
 
     {
-       11,   45,   45,   46,   45,   45,   45,   45,   45,   45,
-       45,   45,   45,   45,   45,   45,   45,   45,   45
+       11,   44,   44,   45,   44,   44,   44,   44,   44,   44,
+       44,   44,   44,   44,   44,   44,   44,   44
 
     },
 
     {
-       11,   45,   45,   46,   45,   45,   45,   45,   45,   45,
-       45,   45,   45,   45,   45,   45,   45,   45,   45
+       11,  -45,  -45,  -45,  -45,  -45,  -45,  -45,  -45,  -45,
+      -45,  -45,  -45,  -45,  -45,  -45,  -45,  -45
     },
 
     {
        11,  -46,  -46,  -46,  -46,  -46,  -46,  -46,  -46,  -46,
-      -46,  -46,  -46,  -46,  -46,  -46,  -46,  -46,  -46
+      -46,   46,  -46,  -46,  -46,  -46,  -46,  -46
     },
 
     {
-       11,  -47,  -47,  -47,  -47,  -47,  -47,  -47,  -47,  -47,
-      -47,  -47,  -47,   47,  -47,  -47,  -47,  -47,  -47
+       11,   47,   47,  -47,   47,   47,   47,   47,   47,   47,
+       47,   47,   47,   47,   47,   47,   47,   47
     },
 
     {
-       11,   48,   48,  -48,   48,   48,   48,   48,   48,   48,
-       48,   48,   48,   48,   48,   48,   48,   48,   48
+       11,  -48,   48,   49,  -48,  -48,  -48,  -48,  -48,  -48,
+      -48,  -48,  -48,  -48,  -48,  -48,  -48,  -48
     },
 
     {
-       11,  -49,   49,   50,  -49,  -49,  -49,  -49,  -49,  -49,
-      -49,  -49,  -49,  -49,  -49,  -49,  -49,  -49,  -49
+       11,   50,  -49,  -49,   50,   50,   50,   50,   50,   50,
+       50,   50,   50,   50,   50,   50,   50,   50
 
     },
 
     {
-       11,   51,  -50,  -50,   51,   51,   51,   51,   51,   51,
-       51,   51,   51,   51,   51,   51,   51,   51,   51
+       11,  -50,  -50,  -50,  -50,  -50,  -50,  -50,  -50,  -50,
+      -50,  -50,  -50,  -50,  -50,  -50,  -50,  -50
     },
 
     {
-       11,  -51,  -51,  -51,  -51,  -51,  -51,  -51,  -51,  -51,
-      -51,  -51,  -51,  -51,  -51,  -51,  -51,  -51,  -51
+       11,   51,   51,   52,   51,  -51,   51,   51,  -51,   51,
+       51,   51,   51,   51,   51,   51,  -51,   51
     },
 
     {
-       11,   52,   52,   53,   52,  -52,   52,   52,  -52,   52,
-       52,   52,   52,   52,   52,   52,   52,  -52,   52
+       11,  -52,  -52,  -52,  -52,  -52,  -52,  -52,  -52,  -52,
+      -52,  -52,  -52,  -52,  -52,  -52,  -52,  -52
     },
 
     {
-       11,  -53,  -53,  -53,  -53,  -53,  -53,  -53,  -53,  -53,
-      -53,  -53,  -53,  -53,  -53,  -53,  -53,  -53,  -53
+       11,  -53,  -53,   54,  -53,  -53,  -53,  -53,  -53,  -53,
+      -53,  -53,  -53,  -53,  -53,  -53,  -53,  -53
     },
 
     {
-       11,  -54,  -54,   55,  -54,  -54,  -54,  -54,  -54,  -54,
-      -54,  -54,  -54,  -54,  -54,  -54,  -54,  -54,  -54
+       11,  -54,  -54,  -54,  -54,  -54,  -54,  -54,  -54,  -54,
+      -54,  -54,  -54,  -54,  -54,  -54,  -54,  -54
 
     },
 
     {
-       11,  -55,  -55,  -55,  -55,  -55,  -55,  -55,  -55,  -55,
-      -55,  -55,  -55,  -55,  -55,  -55,  -55,  -55,  -55
+       11,  -55,   55,  -55,  -55,  -55,  -55,  -55,  -55,  -55,
+      -55,  -55,  -55,  -55,  -55,  -55,  -55,  -55
     },
 
     {
-       11,  -56,   56,  -56,  -56,  -56,  -56,  -56,  -56,  -56,
-      -56,  -56,  -56,  -56,  -56,  -56,  -56,  -56,  -56
+       11,  -56,  -56,  -56,  -56,  -56,  -56,  -56,  -56,  -56,
+      -56,  -56,  -56,  -56,  -56,  -56,  -56,  -56
     },
 
     {
-       11,  -57,  -57,  -57,  -57,  -57,  -57,  -57,  -57,  -57,
-      -57,  -57,  -57,  -57,  -57,  -57,  -57,  -57,  -57
+       11,   57,   57,  -57,   57,   57,   57,   57,   57,   57,
+       57,   57,   57,   57,   57,   57,   57,   57
     },
 
     {
-       11,   58,   58,  -58,   58,   58,   58,   58,   58,   58,
-       58,   58,   58,   58,   58,   58,   58,   58,   58
+       11,  -58,  -58,  -58,  -58,  -58,  -58,  -58,  -58,  -58,
+      -58,  -58,  -58,  -58,  -58,  -58,  -58,  -58
     },
 
     {
        11,  -59,  -59,  -59,  -59,  -59,  -59,  -59,  -59,  -59,
-      -59,  -59,  -59,  -59,  -59,  -59,  -59,  -59,  -59
+      -59,   59,   59,  -59,  -59,  -59,  -59,  -59
 
     },
 
     {
        11,  -60,  -60,  -60,  -60,  -60,  -60,  -60,  -60,  -60,
-      -60,   66,   61,   61,  -60,  -60,  -60,  -60,  -60
+      -60,  -60,  -60,  -60,  -60,  -60,  -60,  -60
     },
 
     {
        11,  -61,  -61,  -61,  -61,  -61,  -61,  -61,  -61,  -61,
-      -61,   61,   61,   61,  -61,  -61,  -61,  -61,  -61
+      -61,  -61,  -61,  -61,  -61,  -61,  -61,  -61
     },
 
     {
        11,  -62,  -62,  -62,  -62,  -62,  -62,  -62,  -62,  -62,
-      -62,  -62,  -62,  -62,  -62,  -62,  -62,  -62,  -62
+      -62,  -62,  -62,  -62,  -62,  -62,  -62,  -62
     },
 
     {
        11,  -63,  -63,  -63,  -63,  -63,  -63,  -63,  -63,  -63,
-      -63,  -63,  -63,  -63,  -63,  -63,  -63,  -63,  -63
-    },
-
-    {
-       11,  -64,  -64,  -64,  -64,  -64,  -64,  -64,  -64,  -64,
-      -64,  -64,  -64,  -64,  -64,  -64,  -64,  -64,  -64
-
-    },
-
-    {
-       11,  -65,  -65,  -65,  -65,  -65,  -65,  -65,  -65,  -65,
-      -65,  -65,  -65,  -65,  -65,  -65,  -65,  -65,  -65
-    },
-
-    {
-       11,  -66,  -66,  -66,  -66,  -66,  -66,  -66,  -66,  -66,
-      -66,   61,   61,   61,  -66,  -66,  -66,  -66,  -66
+      -63,  -63,  -63,  -63,  -63,  -63,  -63,  -63
     },
 
     } ;
@@ -732,8 +725,8 @@ static void yy_fatal_error (yyconst char msg[]  );
 	*yy_cp = '\0'; \
 	(yy_c_buf_p) = yy_cp;
 
-#define YY_NUM_RULES 38
-#define YY_END_OF_BUFFER 39
+#define YY_NUM_RULES 37
+#define YY_END_OF_BUFFER 38
 /* This struct is not used in this scanner,
    but its presence is necessary. */
 struct yy_trans_info
@@ -741,15 +734,15 @@ struct yy_trans_info
 	flex_int32_t yy_verify;
 	flex_int32_t yy_nxt;
 	};
-static yyconst flex_int16_t yy_accept[67] =
+static yyconst flex_int16_t yy_accept[64] =
     {   0,
         0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
-       39,    5,    4,    2,    3,    7,    8,    6,   37,   34,
-       36,   29,   33,   32,   31,   27,   26,   21,   13,   20,
-       24,   27,   11,   12,   23,   23,   18,   14,   19,   27,
-       27,    4,    2,    3,    3,    1,    6,   37,   34,   36,
-       35,   29,   28,   31,   30,   26,   15,   24,    9,   23,
-       23,   16,   17,   25,   10,   22
+       38,    5,    4,    2,    3,    7,    8,    6,   36,   33,
+       35,   28,   32,   31,   30,   26,   25,   21,   13,   20,
+       23,   26,   11,   12,   22,   18,   14,   19,   26,   26,
+        4,    2,    3,    3,    1,    6,   36,   33,   35,   34,
+       28,   27,   30,   29,   25,   15,   23,    9,   22,   16,
+       17,   24,   10
     } ;
 
 static yyconst flex_int32_t yy_ec[256] =
@@ -758,16 +751,16 @@ static yyconst flex_int32_t yy_ec[256] =
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    2,    4,    5,    6,    1,    1,    7,    8,    9,
-       10,    1,    1,    1,   11,   12,   12,   13,   13,   13,
-       13,   13,   13,   13,   13,   13,   13,    1,    1,   14,
-       15,   16,    1,    1,   13,   13,   13,   13,   13,   13,
-       13,   13,   13,   13,   13,   13,   13,   13,   13,   13,
-       13,   13,   13,   13,   13,   13,   13,   13,   13,   13,
-        1,   17,    1,    1,   13,    1,   13,   13,   13,   13,
-
-       13,   13,   13,   13,   13,   13,   13,   13,   13,   13,
-       13,   13,   13,   13,   13,   13,   13,   13,   13,   13,
-       13,   13,    1,   18,    1,    1,    1,    1,    1,    1,
+       10,    1,    1,    1,   11,   12,   12,   11,   11,   11,
+       11,   11,   11,   11,   11,   11,   11,    1,    1,   13,
+       14,   15,    1,    1,   11,   11,   11,   11,   11,   11,
+       11,   11,   11,   11,   11,   11,   11,   11,   11,   11,
+       11,   11,   11,   11,   11,   11,   11,   11,   11,   11,
+        1,   16,    1,    1,   11,    1,   11,   11,   11,   11,
+
+       11,   11,   11,   11,   11,   11,   11,   11,   11,   11,
+       11,   11,   11,   11,   11,   11,   11,   11,   11,   11,
+       11,   11,    1,   17,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
@@ -861,6 +854,13 @@ static void alloc_string(const char *str, int size)
 	text[size] = 0;
 }
 
+static void warn_ignored_character(char chr)
+{
+	fprintf(stderr,
+	        "%s:%d:warning: ignoring unsupported character '%c'\n",
+	        zconf_curname(), zconf_lineno(), chr);
+}
+
 #define INITIAL 0
 #define COMMAND 1
 #define HELP 2
@@ -944,7 +944,12 @@ static int input (void );
 
 /* Amount of stuff to slurp up with each read. */
 #ifndef YY_READ_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k */
+#define YY_READ_BUF_SIZE 16384
+#else
 #define YY_READ_BUF_SIZE 8192
+#endif /* __ia64__ */
 #endif
 
 /* Copy whatever the last rule matched to the standard output. */
@@ -952,7 +957,7 @@ static int input (void );
 /* This used to be an fputs(), but since the string might contain NUL's,
  * we now use fwrite().
  */
-#define ECHO fwrite( zconftext, zconfleng, 1, zconfout )
+#define ECHO do { if (fwrite( zconftext, zconfleng, 1, zconfout )) {} } while (0)
 #endif
 
 /* Gets input and stuffs it into "buf".  number of characters read, or YY_NULL,
@@ -1132,7 +1137,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 7:
 YY_RULE_SETUP
-
+warn_ignored_character(*zconftext);
 	YY_BREAK
 case 8:
 /* rule 8 can match eol */
@@ -1203,10 +1208,6 @@ BEGIN(INITIAL); current_file->lineno++; return T_EOL;
 	YY_BREAK
 case 22:
 YY_RULE_SETUP
-/* ignore */
-	YY_BREAK
-case 23:
-YY_RULE_SETUP
 {
 		const struct kconf_id *id = kconf_id_lookup(zconftext, zconfleng);
 		if (id && id->flags & TF_PARAM) {
@@ -1218,26 +1219,22 @@ YY_RULE_SETUP
 		return T_WORD;
 	}
 	YY_BREAK
-case 24:
+case 23:
 YY_RULE_SETUP
 /* comment */
 	YY_BREAK
-case 25:
-/* rule 25 can match eol */
+case 24:
+/* rule 24 can match eol */
 YY_RULE_SETUP
 current_file->lineno++;
 	YY_BREAK
-case 26:
+case 25:
 YY_RULE_SETUP
 
 	YY_BREAK
-case 27:
+case 26:
 YY_RULE_SETUP
-{
-		fprintf(stderr,
-		        "%s:%d:warning: ignoring unsupported character '%c'\n",
-		        zconf_curname(), zconf_lineno(), *zconftext);
-	}
+warn_ignored_character(*zconftext);
 	YY_BREAK
 case YY_STATE_EOF(PARAM):
 {
@@ -1245,8 +1242,8 @@ case YY_STATE_EOF(PARAM):
 	}
 	YY_BREAK
 
-case 28:
-/* rule 28 can match eol */
+case 27:
+/* rule 27 can match eol */
 *yy_cp = (yy_hold_char); /* undo effects of setting up zconftext */
 (yy_c_buf_p) = yy_cp -= 1;
 YY_DO_BEFORE_ACTION; /* set up zconftext again */
@@ -1257,14 +1254,14 @@ YY_RULE_SETUP
 		return T_WORD_QUOTE;
 	}
 	YY_BREAK
-case 29:
+case 28:
 YY_RULE_SETUP
 {
 		append_string(zconftext, zconfleng);
 	}
 	YY_BREAK
-case 30:
-/* rule 30 can match eol */
+case 29:
+/* rule 29 can match eol */
 *yy_cp = (yy_hold_char); /* undo effects of setting up zconftext */
 (yy_c_buf_p) = yy_cp -= 1;
 YY_DO_BEFORE_ACTION; /* set up zconftext again */
@@ -1275,13 +1272,13 @@ YY_RULE_SETUP
 		return T_WORD_QUOTE;
 	}
 	YY_BREAK
-case 31:
+case 30:
 YY_RULE_SETUP
 {
 		append_string(zconftext + 1, zconfleng - 1);
 	}
 	YY_BREAK
-case 32:
+case 31:
 YY_RULE_SETUP
 {
 		if (str == zconftext[0]) {
@@ -1292,8 +1289,8 @@ YY_RULE_SETUP
 			append_string(zconftext, 1);
 	}
 	YY_BREAK
-case 33:
-/* rule 33 can match eol */
+case 32:
+/* rule 32 can match eol */
 YY_RULE_SETUP
 {
 		printf("%s:%d:warning: multi-line strings not supported\n", zconf_curname(), zconf_lineno());
@@ -1308,7 +1305,7 @@ case YY_STATE_EOF(STRING):
 	}
 	YY_BREAK
 
-case 34:
+case 33:
 YY_RULE_SETUP
 {
 		ts = 0;
@@ -1333,8 +1330,8 @@ YY_RULE_SETUP
 		}
 	}
 	YY_BREAK
-case 35:
-/* rule 35 can match eol */
+case 34:
+/* rule 34 can match eol */
 *yy_cp = (yy_hold_char); /* undo effects of setting up zconftext */
 (yy_c_buf_p) = yy_cp -= 1;
 YY_DO_BEFORE_ACTION; /* set up zconftext again */
@@ -1345,15 +1342,15 @@ YY_RULE_SETUP
 		return T_HELPTEXT;
 	}
 	YY_BREAK
-case 36:
-/* rule 36 can match eol */
+case 35:
+/* rule 35 can match eol */
 YY_RULE_SETUP
 {
 		current_file->lineno++;
 		append_string("\n", 1);
 	}
 	YY_BREAK
-case 37:
+case 36:
 YY_RULE_SETUP
 {
 		while (zconfleng) {
@@ -1384,7 +1381,7 @@ case YY_STATE_EOF(COMMAND):
 	yyterminate();
 }
 	YY_BREAK
-case 38:
+case 37:
 YY_RULE_SETUP
 YY_FATAL_ERROR( "flex scanner jammed" );
 	YY_BREAK
@@ -2114,8 +2111,8 @@ YY_BUFFER_STATE zconf_scan_string (yyconst char * yystr )
 
 /** Setup the input buffer state to scan the given bytes. The next call to zconflex() will
  * scan from a @e copy of @a bytes.
- * @param bytes the byte buffer to scan
- * @param len the number of bytes in the buffer pointed to by @a bytes.
+ * @param yybytes the byte buffer to scan
+ * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes.
  * 
  * @return the newly allocated buffer state object.
  */

From bec8a5a22640325e86cf5b737c18888747631099 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Fri, 10 Jul 2015 13:53:06 +0900
Subject: [PATCH 299/734] coccinelle: api: extend spatch for dropping
 unnecessary owner

i2c_add_driver (through i2c_register_driver) sets the owner field so we
can drop it also from i2c drivers, just like from platform drivers.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Acked-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: Michal Marek <mmarek@suse.com>
---
 .../api/platform_no_drv_owner.cocci           | 73 +++++++++++++++++++
 1 file changed, 73 insertions(+)

diff --git a/scripts/coccinelle/api/platform_no_drv_owner.cocci b/scripts/coccinelle/api/platform_no_drv_owner.cocci
index e065b9e714fcdf..c5e3f73f205465 100644
--- a/scripts/coccinelle/api/platform_no_drv_owner.cocci
+++ b/scripts/coccinelle/api/platform_no_drv_owner.cocci
@@ -9,11 +9,14 @@ virtual org
 virtual report
 
 @match1@
+declarer name module_i2c_driver;
 declarer name module_platform_driver;
 declarer name module_platform_driver_probe;
 identifier __driver;
 @@
 (
+	module_i2c_driver(__driver);
+|
 	module_platform_driver(__driver);
 |
 	module_platform_driver_probe(__driver, ...);
@@ -28,6 +31,15 @@ identifier match1.__driver;
 		}
 	};
 
+@fix1_i2c depends on match1 && patch && !context && !org && !report@
+identifier match1.__driver;
+@@
+	static struct i2c_driver __driver = {
+		.driver = {
+-			.owner = THIS_MODULE,
+		}
+	};
+
 @match2@
 identifier __driver;
 @@
@@ -37,6 +49,8 @@ identifier __driver;
 	platform_driver_probe(&__driver, ...)
 |
 	platform_create_bundle(&__driver, ...)
+|
+	i2c_add_driver(&__driver)
 )
 
 @fix2 depends on match2 && patch && !context && !org && !report@
@@ -48,6 +62,15 @@ identifier match2.__driver;
 		}
 	};
 
+@fix2_i2c depends on match2 && patch && !context && !org && !report@
+identifier match2.__driver;
+@@
+	static struct i2c_driver __driver = {
+		.driver = {
+-			.owner = THIS_MODULE,
+		}
+	};
+
 // ----------------------------------------------------------------------------
 
 @fix1_context depends on match1 && !patch && (context || org || report)@
@@ -61,6 +84,17 @@ position j0;
 		}
 	};
 
+@fix1_i2c_context depends on match1 && !patch && (context || org || report)@
+identifier match1.__driver;
+position j0;
+@@
+
+	static struct i2c_driver __driver = {
+		.driver = {
+*			.owner@j0 = THIS_MODULE,
+		}
+	};
+
 @fix2_context depends on match2 && !patch && (context || org || report)@
 identifier match2.__driver;
 position j0;
@@ -72,6 +106,17 @@ position j0;
 		}
 	};
 
+@fix2_i2c_context depends on match2 && !patch && (context || org || report)@
+identifier match2.__driver;
+position j0;
+@@
+
+	static struct i2c_driver __driver = {
+		.driver = {
+*			.owner@j0 = THIS_MODULE,
+		}
+	};
+
 // ----------------------------------------------------------------------------
 
 @script:python fix1_org depends on org@
@@ -81,6 +126,13 @@ j0 << fix1_context.j0;
 msg = "No need to set .owner here. The core will do it."
 coccilib.org.print_todo(j0[0], msg)
 
+@script:python fix1_i2c_org depends on org@
+j0 << fix1_i2c_context.j0;
+@@
+
+msg = "No need to set .owner here. The core will do it."
+coccilib.org.print_todo(j0[0], msg)
+
 @script:python fix2_org depends on org@
 j0 << fix2_context.j0;
 @@
@@ -88,6 +140,13 @@ j0 << fix2_context.j0;
 msg = "No need to set .owner here. The core will do it."
 coccilib.org.print_todo(j0[0], msg)
 
+@script:python fix2_i2c_org depends on org@
+j0 << fix2_i2c_context.j0;
+@@
+
+msg = "No need to set .owner here. The core will do it."
+coccilib.org.print_todo(j0[0], msg)
+
 // ----------------------------------------------------------------------------
 
 @script:python fix1_report depends on report@
@@ -97,6 +156,13 @@ j0 << fix1_context.j0;
 msg = "No need to set .owner here. The core will do it."
 coccilib.report.print_report(j0[0], msg)
 
+@script:python fix1_i2c_report depends on report@
+j0 << fix1_i2c_context.j0;
+@@
+
+msg = "No need to set .owner here. The core will do it."
+coccilib.report.print_report(j0[0], msg)
+
 @script:python fix2_report depends on report@
 j0 << fix2_context.j0;
 @@
@@ -104,3 +170,10 @@ j0 << fix2_context.j0;
 msg = "No need to set .owner here. The core will do it."
 coccilib.report.print_report(j0[0], msg)
 
+@script:python fix2_i2c_report depends on report@
+j0 << fix2_i2c_context.j0;
+@@
+
+msg = "No need to set .owner here. The core will do it."
+coccilib.report.print_report(j0[0], msg)
+

From bea107b1f1ee79da0bc2c1ee0c70bf1c3c6c41b4 Mon Sep 17 00:00:00 2001
From: Christian Kujau <lists@nerdbynature.de>
Date: Sun, 2 Aug 2015 20:03:26 -0700
Subject: [PATCH 300/734] fallback to hostname in scripts/package/builddeb

I happened to build a kernel with "make deb-pkg" on a machine with no
network connectivity, but this failed with:

[...]
  INSTALL debian/headertmp/usr/include/asm/ (65 files)
hostname: Name or service not known
../scripts/package/Makefile:90: recipe for target 'deb-pkg' failed
make[2]: *** [deb-pkg] Error 1

In scripts/package/builddeb it tries to construct an email address (that
can be queried in /proc/version later on) but with no network,
the "hostname -f" fails. The following patch falls back to just use the
shortname if we cannot determine our FQDN.

Signed-off-by: Christian Kujau <lists@nerdbynature.de>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 scripts/package/builddeb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index 0ccd7ee583ec5e..0c18cfd9328761 100755
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -214,7 +214,7 @@ if [ -n "$DEBEMAIL" ]; then
 elif [ -n "$EMAIL" ]; then
        email=$EMAIL
 else
-       email=$(id -nu)@$(hostname -f)
+       email=$(id -nu)@$(hostname -f 2>/dev/null || hostname)
 fi
 if [ -n "$DEBFULLNAME" ]; then
        name=$DEBFULLNAME

From 6080a89357cc46f3450839a84af75c3d18f57772 Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ti.com>
Date: Thu, 15 Jan 2015 13:47:19 +0200
Subject: [PATCH 301/734] fbdev: fix cea_modes array size

CEA defines 64 modes, indexed from 1 to 64. modedb has cea_modes arrays,
which contains 64 entries. However, the code uses the CEA indices
directly, i.e. the first mode is at cea_modes[1]. This means the array
is one too short.

This does not cause references to uninitialized memory as the code in
fbmon only allows indexes up to 63, and the cea_modes does not contain
an entry for the mode 64 so it could not be used in any case.

However, the code contains a check 'if (idx > ARRAY_SIZE(cea_modes)',
and while that check is a no-op as at that point idx cannot be >= 63, it
upsets static checkers.

Fix this by increasing the cea_array size to be 65, and change the code
to allow mode 64.

Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
---
 drivers/video/fbdev/core/fbmon.c  | 4 ++--
 drivers/video/fbdev/core/modedb.c | 2 +-
 include/linux/fb.h                | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/video/fbdev/core/fbmon.c b/drivers/video/fbdev/core/fbmon.c
index d787533d9c8b0b..47c3191ec313db 100644
--- a/drivers/video/fbdev/core/fbmon.c
+++ b/drivers/video/fbdev/core/fbmon.c
@@ -1072,9 +1072,9 @@ void fb_edid_add_monspecs(unsigned char *edid, struct fb_monspecs *specs)
 
 	for (i = specs->modedb_len + num; i < specs->modedb_len + num + svd_n; i++) {
 		int idx = svd[i - specs->modedb_len - num];
-		if (!idx || idx > 63) {
+		if (!idx || idx >= ARRAY_SIZE(cea_modes)) {
 			pr_warning("Reserved SVD code %d\n", idx);
-		} else if (idx > ARRAY_SIZE(cea_modes) || !cea_modes[idx].xres) {
+		} else if (!cea_modes[idx].xres) {
 			pr_warning("Unimplemented SVD code %d\n", idx);
 		} else {
 			memcpy(&m[i], cea_modes + idx, sizeof(m[i]));
diff --git a/drivers/video/fbdev/core/modedb.c b/drivers/video/fbdev/core/modedb.c
index 7d07cf824b64c0..2510fa728d7716 100644
--- a/drivers/video/fbdev/core/modedb.c
+++ b/drivers/video/fbdev/core/modedb.c
@@ -289,7 +289,7 @@ static const struct fb_videomode modedb[] = {
 };
 
 #ifdef CONFIG_FB_MODE_HELPERS
-const struct fb_videomode cea_modes[64] = {
+const struct fb_videomode cea_modes[65] = {
 	/* #1: 640x480p@59.94/60Hz */
 	[1] = {
 		NULL, 60, 640, 480, 39722, 48, 16, 33, 10, 96, 2, 0,
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 043f3283b71c42..bc9afa74ee11cb 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -788,7 +788,7 @@ struct dmt_videomode {
 
 extern const char *fb_mode_option;
 extern const struct fb_videomode vesa_modes[];
-extern const struct fb_videomode cea_modes[64];
+extern const struct fb_videomode cea_modes[65];
 extern const struct dmt_videomode dmt_modes[];
 
 struct fb_modelist {

From 71540cfbe9d401f4848abc0e6b56a541307a47dd Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sun, 2 Aug 2015 11:09:53 +0200
Subject: [PATCH 302/734] fbdev: Allow compile test of GPIO consumers if
 !GPIOLIB

The GPIO subsystem provides dummy GPIO consumer functions if GPIOLIB is
not enabled. Hence drivers that depend on GPIOLIB, but use GPIO consumer
functionality only, can still be compiled if GPIOLIB is not enabled.

Relax the dependency on GPIOLIB if COMPILE_TEST is enabled, where
appropriate.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 drivers/video/fbdev/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index f888561568d917..11ac660ac0bc76 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -2464,7 +2464,7 @@ config FB_SSD1307
 	tristate "Solomon SSD1307 framebuffer support"
 	depends on FB && I2C
 	depends on OF
-	depends on GPIOLIB
+	depends on GPIOLIB || COMPILE_TEST
 	select FB_SYS_FOPS
 	select FB_SYS_FILLRECT
 	select FB_SYS_COPYAREA

From f63cb8d7aa31e7a8f98cec36557b265951e9aba7 Mon Sep 17 00:00:00 2001
From: Alexey Klimov <klimov.linux@gmail.com>
Date: Mon, 8 Jun 2015 02:02:47 +0300
Subject: [PATCH 303/734] fbdev: udlfb: remove unneeded initialization in few
 places

Small minor cleanup.
This patch removes unneeded initializations of variables
in few places in different functions and one empty line.

Signed-off-by: Alexey Klimov <klimov.linux@gmail.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 drivers/video/fbdev/udlfb.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/video/fbdev/udlfb.c b/drivers/video/fbdev/udlfb.c
index ff2b8731a2dc67..e9c2f7ba3c8e6c 100644
--- a/drivers/video/fbdev/udlfb.c
+++ b/drivers/video/fbdev/udlfb.c
@@ -279,7 +279,7 @@ static int dlfb_set_video_mode(struct dlfb_data *dev,
 {
 	char *buf;
 	char *wrptr;
-	int retval = 0;
+	int retval;
 	int writesize;
 	struct urb *urb;
 
@@ -1505,8 +1505,7 @@ static int dlfb_parse_vendor_descriptor(struct dlfb_data *dev,
 	char *desc;
 	char *buf;
 	char *desc_end;
-
-	int total_len = 0;
+	int total_len;
 
 	buf = kzalloc(MAX_VENDOR_DESCRIPTOR_SIZE, GFP_KERNEL);
 	if (!buf)
@@ -1582,7 +1581,7 @@ static int dlfb_usb_probe(struct usb_interface *interface,
 			const struct usb_device_id *id)
 {
 	struct usb_device *usbdev;
-	struct dlfb_data *dev = NULL;
+	struct dlfb_data *dev;
 	int retval = -ENOMEM;
 
 	/* usb initialization */
@@ -1665,7 +1664,6 @@ static void dlfb_init_framebuffer_work(struct work_struct *work)
 	/* allocates framebuffer driver structure, not framebuffer memory */
 	info = framebuffer_alloc(0, dev->gdev);
 	if (!info) {
-		retval = -ENOMEM;
 		pr_err("framebuffer_alloc failed\n");
 		goto error;
 	}
@@ -1912,7 +1910,7 @@ static int dlfb_alloc_urb_list(struct dlfb_data *dev, int count, size_t size)
 
 static struct urb *dlfb_get_urb(struct dlfb_data *dev)
 {
-	int ret = 0;
+	int ret;
 	struct list_head *entry;
 	struct urb_node *unode;
 	struct urb *urb = NULL;

From 48fd8ecf29e3c1199432e173858f5ca4bc529738 Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nicstange@gmail.com>
Date: Wed, 17 Jun 2015 21:40:33 +0200
Subject: [PATCH 304/734] framebuffer: disable vgacon on microblaze arch

Fix an allmodconfig link failer on microblaze:
  drivers/built-in.o: In function `vgacon_save_screen':
  drivers/video/console/.tmp_vgacon.o:(.text+0x8fc10):
    undefined reference to `screen_info'

Disable vgacon on microblaze because the symbol
  struct screen_info screen_info;
is not defined for the microblaze arch.

Signed-off-by: Nicolai Stange <nicstange@gmail.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 drivers/video/console/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/video/console/Kconfig b/drivers/video/console/Kconfig
index ba97efc3bf707d..e0dc0fee4ea636 100644
--- a/drivers/video/console/Kconfig
+++ b/drivers/video/console/Kconfig
@@ -9,7 +9,7 @@ config VGA_CONSOLE
 	depends on !4xx && !8xx && !SPARC && !M68K && !PARISC && !FRV && \
 		!SUPERH && !BLACKFIN && !AVR32 && !MN10300 && !CRIS && \
 		(!ARM || ARCH_FOOTBRIDGE || ARCH_INTEGRATOR || ARCH_NETWINDER) && \
-		!ARM64
+		!ARM64 && !MICROBLAZE
 	default y
 	help
 	  Saying Y here will allow you to use Linux in text mode through a

From c0a3229313d6931a77e0c1e1c22e398c0312bf6f Mon Sep 17 00:00:00 2001
From: Marcin Chojnacki <marcinch7@gmail.com>
Date: Thu, 18 Jun 2015 15:01:52 +0200
Subject: [PATCH 305/734] fbdev: remove unnecessary memset in vfb

In vfb_probe memory is allocated using rvmalloc which automatically
sets the allocated memory to zero. This patch removes the second
unnecessary memset in vfb_probe.

Signed-off-by: Marcin Chojnacki <marcinch7@gmail.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 drivers/video/fbdev/vfb.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/drivers/video/fbdev/vfb.c b/drivers/video/fbdev/vfb.c
index 70a897b1e4588c..b9c2f81fb6b9f0 100644
--- a/drivers/video/fbdev/vfb.c
+++ b/drivers/video/fbdev/vfb.c
@@ -51,7 +51,14 @@ static void *rvmalloc(unsigned long size)
 	if (!mem)
 		return NULL;
 
-	memset(mem, 0, size); /* Clear the ram out, no junk to the user */
+	/*
+	 * VFB must clear memory to prevent kernel info
+	 * leakage into userspace
+	 * VGA-based drivers MUST NOT clear memory if
+	 * they want to be able to take over vgacon
+	 */
+
+	memset(mem, 0, size);
 	adr = (unsigned long) mem;
 	while (size > 0) {
 		SetPageReserved(vmalloc_to_page((void *)adr));
@@ -490,14 +497,6 @@ static int vfb_probe(struct platform_device *dev)
 	if (!(videomemory = rvmalloc(videomemorysize)))
 		return retval;
 
-	/*
-	 * VFB must clear memory to prevent kernel info
-	 * leakage into userspace
-	 * VGA-based drivers MUST NOT clear memory if
-	 * they want to be able to take over vgacon
-	 */
-	memset(videomemory, 0, videomemorysize);
-
 	info = framebuffer_alloc(sizeof(u32) * 256, &dev->dev);
 	if (!info)
 		goto err;

From 2e8bcf4d40d8ec720372df5c591f0151f8ffe946 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Fri, 10 Jul 2015 15:37:29 +0900
Subject: [PATCH 306/734] video: fbdev: Drop owner assignment from i2c_driver

i2c_driver does not need to set an owner because i2c_register_driver()
will set it.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 drivers/video/fbdev/ssd1307fb.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/video/fbdev/ssd1307fb.c b/drivers/video/fbdev/ssd1307fb.c
index 3e153c06131ad9..b6edd28b267f2b 100644
--- a/drivers/video/fbdev/ssd1307fb.c
+++ b/drivers/video/fbdev/ssd1307fb.c
@@ -719,7 +719,6 @@ static struct i2c_driver ssd1307fb_driver = {
 	.driver = {
 		.name = "ssd1307fb",
 		.of_match_table = ssd1307fb_of_match,
-		.owner = THIS_MODULE,
 	},
 };
 

From c4a2f5beb7fe3cf4e33ed79040d699fd6c17083f Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Fri, 10 Jul 2015 15:37:30 +0900
Subject: [PATCH 307/734] video: fbdev: Drop owner assignment from
 platform_driver

platform_driver does not need to set an owner because
platform_driver_register() will set it.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 drivers/video/fbdev/omap2/displays-new/encoder-opa362.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/video/fbdev/omap2/displays-new/encoder-opa362.c b/drivers/video/fbdev/omap2/displays-new/encoder-opa362.c
index a14d993f719ddd..8c246c213e06ea 100644
--- a/drivers/video/fbdev/omap2/displays-new/encoder-opa362.c
+++ b/drivers/video/fbdev/omap2/displays-new/encoder-opa362.c
@@ -266,7 +266,6 @@ static struct platform_driver opa362_driver = {
 	.remove	= __exit_p(opa362_remove),
 	.driver	= {
 		.name	= "amplifier-opa362",
-		.owner	= THIS_MODULE,
 		.of_match_table = opa362_of_match,
 		.suppress_bind_attrs = true,
 	},

From cb73b40e59f0f62099bfb8a836697a74dc151395 Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ti.com>
Date: Thu, 20 Aug 2015 13:46:10 +0300
Subject: [PATCH 308/734] video: fbdev: atmel: fix warning for const return
 value

A const on a return value is meaningless and generates a warning on some
versions of gcc:

drivers/video/fbdev/atmel_lcdfb.c:1003: warning: type qualifiers ignored on function return type

The function in question is only used inside the .c file, so the author
of the code most likely means "static" instead of "const".

Change the const to static.

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Cc: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 drivers/video/fbdev/atmel_lcdfb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/video/fbdev/atmel_lcdfb.c b/drivers/video/fbdev/atmel_lcdfb.c
index abadc490fa1f58..016cae1425a4ea 100644
--- a/drivers/video/fbdev/atmel_lcdfb.c
+++ b/drivers/video/fbdev/atmel_lcdfb.c
@@ -999,7 +999,7 @@ static const char *atmel_lcdfb_wiring_modes[] = {
 	[ATMEL_LCDC_WIRING_RGB]	= "RGB",
 };
 
-const int atmel_lcdfb_get_of_wiring_modes(struct device_node *np)
+static int atmel_lcdfb_get_of_wiring_modes(struct device_node *np)
 {
 	const char *mode;
 	int err, i;

From 772f95e3b9460c64fb99b134022855cbce75b9a0 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 16 Jul 2015 20:34:42 +0100
Subject: [PATCH 309/734] x86/xen: fix non-ANSI declaration of
 xen_has_pv_devices()

xen_has_pv_devices() has no parameters, so use the normal void
parameter convention to make it match the prototype in the header file
include/xen/platform_pci.h.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/x86/xen/platform-pci-unplug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c
index a8261716d58d6c..9586ff32810cfb 100644
--- a/arch/x86/xen/platform-pci-unplug.c
+++ b/arch/x86/xen/platform-pci-unplug.c
@@ -68,7 +68,7 @@ static int check_platform_magic(void)
 	return 0;
 }
 
-bool xen_has_pv_devices()
+bool xen_has_pv_devices(void)
 {
 	if (!xen_domain())
 		return false;

From a7da51ae10032a507ddeae6a490916eadbd1e10a Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Wed, 15 Jul 2015 12:52:01 +0300
Subject: [PATCH 310/734] xen/preempt: use need_resched() instead of
 should_resched()

This code is used only when CONFIG_PREEMPT=n and only in non-atomic
context: xen_in_preemptible_hcall is set only in
privcmd_ioctl_hypercall().  Thus preempt_count is zero and
should_resched() is equal to need_resched().

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 drivers/xen/preempt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/xen/preempt.c b/drivers/xen/preempt.c
index a1800c150839a7..08cb419eb4e63c 100644
--- a/drivers/xen/preempt.c
+++ b/drivers/xen/preempt.c
@@ -31,7 +31,7 @@ EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall);
 asmlinkage __visible void xen_maybe_preempt_hcall(void)
 {
 	if (unlikely(__this_cpu_read(xen_in_preemptible_hcall)
-		     && should_resched())) {
+		     && need_resched())) {
 		/*
 		 * Clear flag as we may be rescheduled on a different
 		 * cpu.

From 907c3eb18e0bd86ca12a9de80befe8e3647bac3e Mon Sep 17 00:00:00 2001
From: Bob Liu <bob.liu@oracle.com>
Date: Mon, 13 Jul 2015 17:55:24 +0800
Subject: [PATCH 311/734] xen-blkfront: convert to blk-mq APIs

Note: This patch is based on original work of Arianna's internship for
GNOME's Outreach Program for Women.

Only one hardware queue is used now, so there is no significant
performance change

The legacy non-mq code is deleted completely which is the same as other
drivers like virtio, mtip, and nvme.

Also dropped one unnecessary holding of info->io_lock when calling
blk_mq_stop_hw_queues().

Signed-off-by: Arianna Avanzini <avanzini.arianna@gmail.com>
Signed-off-by: Bob Liu <bob.liu@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jens Axboe <axboe@fb.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 drivers/block/xen-blkfront.c | 146 ++++++++++++++---------------------
 1 file changed, 60 insertions(+), 86 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 7a8a73f1fc0462..5dd591d6c85960 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -37,6 +37,7 @@
 
 #include <linux/interrupt.h>
 #include <linux/blkdev.h>
+#include <linux/blk-mq.h>
 #include <linux/hdreg.h>
 #include <linux/cdrom.h>
 #include <linux/module.h>
@@ -148,6 +149,7 @@ struct blkfront_info
 	unsigned int feature_persistent:1;
 	unsigned int max_indirect_segments;
 	int is_ready;
+	struct blk_mq_tag_set tag_set;
 };
 
 static unsigned int nr_minors;
@@ -617,54 +619,41 @@ static inline bool blkif_request_flush_invalid(struct request *req,
 		 !(info->feature_flush & REQ_FUA)));
 }
 
-/*
- * do_blkif_request
- *  read a block; request is in a request queue
- */
-static void do_blkif_request(struct request_queue *rq)
+static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
+			   const struct blk_mq_queue_data *qd)
 {
-	struct blkfront_info *info = NULL;
-	struct request *req;
-	int queued;
-
-	pr_debug("Entered do_blkif_request\n");
-
-	queued = 0;
+	struct blkfront_info *info = qd->rq->rq_disk->private_data;
 
-	while ((req = blk_peek_request(rq)) != NULL) {
-		info = req->rq_disk->private_data;
-
-		if (RING_FULL(&info->ring))
-			goto wait;
+	blk_mq_start_request(qd->rq);
+	spin_lock_irq(&info->io_lock);
+	if (RING_FULL(&info->ring))
+		goto out_busy;
 
-		blk_start_request(req);
+	if (blkif_request_flush_invalid(qd->rq, info))
+		goto out_err;
 
-		if (blkif_request_flush_invalid(req, info)) {
-			__blk_end_request_all(req, -EOPNOTSUPP);
-			continue;
-		}
+	if (blkif_queue_request(qd->rq))
+		goto out_busy;
 
-		pr_debug("do_blk_req %p: cmd %p, sec %lx, "
-			 "(%u/%u) [%s]\n",
-			 req, req->cmd, (unsigned long)blk_rq_pos(req),
-			 blk_rq_cur_sectors(req), blk_rq_sectors(req),
-			 rq_data_dir(req) ? "write" : "read");
-
-		if (blkif_queue_request(req)) {
-			blk_requeue_request(rq, req);
-wait:
-			/* Avoid pointless unplugs. */
-			blk_stop_queue(rq);
-			break;
-		}
+	flush_requests(info);
+	spin_unlock_irq(&info->io_lock);
+	return BLK_MQ_RQ_QUEUE_OK;
 
-		queued++;
-	}
+out_err:
+	spin_unlock_irq(&info->io_lock);
+	return BLK_MQ_RQ_QUEUE_ERROR;
 
-	if (queued != 0)
-		flush_requests(info);
+out_busy:
+	spin_unlock_irq(&info->io_lock);
+	blk_mq_stop_hw_queue(hctx);
+	return BLK_MQ_RQ_QUEUE_BUSY;
 }
 
+static struct blk_mq_ops blkfront_mq_ops = {
+	.queue_rq = blkif_queue_rq,
+	.map_queue = blk_mq_map_queue,
+};
+
 static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
 				unsigned int physical_sector_size,
 				unsigned int segments)
@@ -672,9 +661,22 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
 	struct request_queue *rq;
 	struct blkfront_info *info = gd->private_data;
 
-	rq = blk_init_queue(do_blkif_request, &info->io_lock);
-	if (rq == NULL)
+	memset(&info->tag_set, 0, sizeof(info->tag_set));
+	info->tag_set.ops = &blkfront_mq_ops;
+	info->tag_set.nr_hw_queues = 1;
+	info->tag_set.queue_depth =  BLK_RING_SIZE(info);
+	info->tag_set.numa_node = NUMA_NO_NODE;
+	info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
+	info->tag_set.cmd_size = 0;
+	info->tag_set.driver_data = info;
+
+	if (blk_mq_alloc_tag_set(&info->tag_set))
 		return -1;
+	rq = blk_mq_init_queue(&info->tag_set);
+	if (IS_ERR(rq)) {
+		blk_mq_free_tag_set(&info->tag_set);
+		return -1;
+	}
 
 	queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
 
@@ -902,19 +904,15 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
 static void xlvbd_release_gendisk(struct blkfront_info *info)
 {
 	unsigned int minor, nr_minors;
-	unsigned long flags;
 
 	if (info->rq == NULL)
 		return;
 
-	spin_lock_irqsave(&info->io_lock, flags);
-
 	/* No more blkif_request(). */
-	blk_stop_queue(info->rq);
+	blk_mq_stop_hw_queues(info->rq);
 
 	/* No more gnttab callback work. */
 	gnttab_cancel_free_callback(&info->callback);
-	spin_unlock_irqrestore(&info->io_lock, flags);
 
 	/* Flush gnttab callback work. Must be done with no locks held. */
 	flush_work(&info->work);
@@ -926,20 +924,18 @@ static void xlvbd_release_gendisk(struct blkfront_info *info)
 	xlbd_release_minors(minor, nr_minors);
 
 	blk_cleanup_queue(info->rq);
+	blk_mq_free_tag_set(&info->tag_set);
 	info->rq = NULL;
 
 	put_disk(info->gd);
 	info->gd = NULL;
 }
 
+/* Must be called with io_lock holded */
 static void kick_pending_request_queues(struct blkfront_info *info)
 {
-	if (!RING_FULL(&info->ring)) {
-		/* Re-enable calldowns. */
-		blk_start_queue(info->rq);
-		/* Kick things off immediately. */
-		do_blkif_request(info->rq);
-	}
+	if (!RING_FULL(&info->ring))
+		blk_mq_start_stopped_hw_queues(info->rq, true);
 }
 
 static void blkif_restart_queue(struct work_struct *work)
@@ -964,7 +960,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
 		BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
 	/* No more blkif_request(). */
 	if (info->rq)
-		blk_stop_queue(info->rq);
+		blk_mq_stop_hw_queues(info->rq);
 
 	/* Remove all persistent grants */
 	if (!list_empty(&info->grants)) {
@@ -1147,7 +1143,6 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 	RING_IDX i, rp;
 	unsigned long flags;
 	struct blkfront_info *info = (struct blkfront_info *)dev_id;
-	int error;
 
 	spin_lock_irqsave(&info->io_lock, flags);
 
@@ -1188,37 +1183,37 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 			continue;
 		}
 
-		error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
+		req->errors = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
 		switch (bret->operation) {
 		case BLKIF_OP_DISCARD:
 			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
 				struct request_queue *rq = info->rq;
 				printk(KERN_WARNING "blkfront: %s: %s op failed\n",
 					   info->gd->disk_name, op_name(bret->operation));
-				error = -EOPNOTSUPP;
+				req->errors = -EOPNOTSUPP;
 				info->feature_discard = 0;
 				info->feature_secdiscard = 0;
 				queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
 				queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq);
 			}
-			__blk_end_request_all(req, error);
+			blk_mq_complete_request(req);
 			break;
 		case BLKIF_OP_FLUSH_DISKCACHE:
 		case BLKIF_OP_WRITE_BARRIER:
 			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
 				printk(KERN_WARNING "blkfront: %s: %s op failed\n",
 				       info->gd->disk_name, op_name(bret->operation));
-				error = -EOPNOTSUPP;
+				req->errors = -EOPNOTSUPP;
 			}
 			if (unlikely(bret->status == BLKIF_RSP_ERROR &&
 				     info->shadow[id].req.u.rw.nr_segments == 0)) {
 				printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
 				       info->gd->disk_name, op_name(bret->operation));
-				error = -EOPNOTSUPP;
+				req->errors = -EOPNOTSUPP;
 			}
-			if (unlikely(error)) {
-				if (error == -EOPNOTSUPP)
-					error = 0;
+			if (unlikely(req->errors)) {
+				if (req->errors == -EOPNOTSUPP)
+					req->errors = 0;
 				info->feature_flush = 0;
 				xlvbd_flush(info);
 			}
@@ -1229,7 +1224,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 				dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
 					"request: %x\n", bret->status);
 
-			__blk_end_request_all(req, error);
+			blk_mq_complete_request(req);
 			break;
 		default:
 			BUG();
@@ -1558,28 +1553,6 @@ static int blkif_recover(struct blkfront_info *info)
 
 	kfree(copy);
 
-	/*
-	 * Empty the queue, this is important because we might have
-	 * requests in the queue with more segments than what we
-	 * can handle now.
-	 */
-	spin_lock_irq(&info->io_lock);
-	while ((req = blk_fetch_request(info->rq)) != NULL) {
-		if (req->cmd_flags &
-		    (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
-			list_add(&req->queuelist, &requests);
-			continue;
-		}
-		merge_bio.head = req->bio;
-		merge_bio.tail = req->biotail;
-		bio_list_merge(&bio_list, &merge_bio);
-		req->bio = NULL;
-		if (req->cmd_flags & (REQ_FLUSH | REQ_FUA))
-			pr_alert("diskcache flush request found!\n");
-		__blk_end_request_all(req, 0);
-	}
-	spin_unlock_irq(&info->io_lock);
-
 	xenbus_switch_state(info->xbdev, XenbusStateConnected);
 
 	spin_lock_irq(&info->io_lock);
@@ -1594,9 +1567,10 @@ static int blkif_recover(struct blkfront_info *info)
 		/* Requeue pending requests (flush or discard) */
 		list_del_init(&req->queuelist);
 		BUG_ON(req->nr_phys_segments > segs);
-		blk_requeue_request(info->rq, req);
+		blk_mq_requeue_request(req);
 	}
 	spin_unlock_irq(&info->io_lock);
+	blk_mq_kick_requeue_list(info->rq);
 
 	while ((bio = bio_list_pop(&bio_list)) != NULL) {
 		/* Traverse the list of pending bios and re-queue them */

From 4a5b69464e51f4a8dd432e8c2a1468630df1a53c Mon Sep 17 00:00:00 2001
From: Julien Grall <julien.grall@citrix.com>
Date: Tue, 28 Jul 2015 10:10:42 +0100
Subject: [PATCH 312/734] xen/events: Support event channel rebind on ARM

Currently, the event channel rebind code is gated with the presence of
the vector callback.

The virtual interrupt controller on ARM has the concept of per-CPU
interrupt (PPI) which allow us to support per-VCPU event channel.
Therefore there is no need of vector callback for ARM.

Xen is already using a free PPI to notify the guest VCPU of an event.
Furthermore, the xen code initialization in Linux (see
arch/arm/xen/enlighten.c) is requesting correctly a per-CPU IRQ.

Introduce new helper xen_support_evtchn_rebind to allow architecture
decide whether rebind an event is support or not. It will always return
true on ARM and keep the same behavior on x86.

This is also allow us to drop the usage of xen_have_vector_callback
entirely in the ARM code.

Signed-off-by: Julien Grall <julien.grall@citrix.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/arm/include/asm/xen/events.h   |  6 ++++++
 arch/arm/xen/enlighten.c            |  4 ----
 arch/arm64/include/asm/xen/events.h |  6 ++++++
 arch/x86/include/asm/xen/events.h   | 11 +++++++++++
 drivers/xen/events/events_base.c    |  6 +-----
 include/xen/events.h                |  1 -
 6 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/arch/arm/include/asm/xen/events.h b/arch/arm/include/asm/xen/events.h
index 8b1f37bfeeecf3..71e473d05fcce8 100644
--- a/arch/arm/include/asm/xen/events.h
+++ b/arch/arm/include/asm/xen/events.h
@@ -20,4 +20,10 @@ static inline int xen_irqs_disabled(struct pt_regs *regs)
 							    atomic64_t,	\
 							    counter), (val))
 
+/* Rebind event channel is supported by default */
+static inline bool xen_support_evtchn_rebind(void)
+{
+	return true;
+}
+
 #endif /* _ASM_ARM_XEN_EVENTS_H */
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 6c09cc440a2b24..40b961d8e953c9 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -45,10 +45,6 @@ static struct vcpu_info __percpu *xen_vcpu_info;
 unsigned long xen_released_pages;
 struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
 
-/* TODO: to be removed */
-__read_mostly int xen_have_vector_callback;
-EXPORT_SYMBOL_GPL(xen_have_vector_callback);
-
 int xen_platform_pci_unplug = XEN_UNPLUG_ALL;
 EXPORT_SYMBOL_GPL(xen_platform_pci_unplug);
 
diff --git a/arch/arm64/include/asm/xen/events.h b/arch/arm64/include/asm/xen/events.h
index 86553213c1322c..4318866d053c51 100644
--- a/arch/arm64/include/asm/xen/events.h
+++ b/arch/arm64/include/asm/xen/events.h
@@ -18,4 +18,10 @@ static inline int xen_irqs_disabled(struct pt_regs *regs)
 
 #define xchg_xen_ulong(ptr, val) xchg((ptr), (val))
 
+/* Rebind event channel is supported by default */
+static inline bool xen_support_evtchn_rebind(void)
+{
+	return true;
+}
+
 #endif /* _ASM_ARM64_XEN_EVENTS_H */
diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h
index 608a79d5a4669e..e6911caf5bbf16 100644
--- a/arch/x86/include/asm/xen/events.h
+++ b/arch/x86/include/asm/xen/events.h
@@ -20,4 +20,15 @@ static inline int xen_irqs_disabled(struct pt_regs *regs)
 /* No need for a barrier -- XCHG is a barrier on x86. */
 #define xchg_xen_ulong(ptr, val) xchg((ptr), (val))
 
+extern int xen_have_vector_callback;
+
+/*
+ * Events delivered via platform PCI interrupts are always
+ * routed to vcpu 0 and hence cannot be rebound.
+ */
+static inline bool xen_support_evtchn_rebind(void)
+{
+	return (!xen_hvm_domain() || xen_have_vector_callback);
+}
+
 #endif /* _ASM_X86_XEN_EVENTS_H */
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 96093ae369a561..ed620e5857a1e6 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -1301,11 +1301,7 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
 	if (!VALID_EVTCHN(evtchn))
 		return -1;
 
-	/*
-	 * Events delivered via platform PCI interrupts are always
-	 * routed to vcpu 0 and hence cannot be rebound.
-	 */
-	if (xen_hvm_domain() && !xen_have_vector_callback)
+	if (!xen_support_evtchn_rebind())
 		return -1;
 
 	/* Send future instances of this interrupt to other vcpu. */
diff --git a/include/xen/events.h b/include/xen/events.h
index 7d95fdf9cf3e77..88da2abaf53592 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -92,7 +92,6 @@ void xen_hvm_callback_vector(void);
 #ifdef CONFIG_TRACING
 #define trace_xen_hvm_callback_vector xen_hvm_callback_vector
 #endif
-extern int xen_have_vector_callback;
 int xen_set_callback_via(uint64_t via);
 void xen_evtchn_do_upcall(struct pt_regs *regs);
 void xen_hvm_evtchn_do_upcall(void);

From 7ed208ef4ef9dbd03cda8a5b5a85cc78f79ef213 Mon Sep 17 00:00:00 2001
From: Julien Grall <julien.grall@citrix.com>
Date: Mon, 3 Aug 2015 09:50:55 +0000
Subject: [PATCH 313/734] arm/xen: Drop the definition of
 xen_pci_platform_unplug

The commit 6f6c15ef912465b3aaafe709f39bd6026a8b3e72 "xen/pvhvm: Remove
the xen_platform_pci int." makes the x86 version of
xen_pci_platform_unplug static.

Therefore we don't need anymore to define a dummy xen_pci_platform_unplug
for ARM.

Signed-off-by: Julien Grall <julien.grall@citrix.com>
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
---
 arch/arm/xen/enlighten.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 40b961d8e953c9..c50c8d33f87407 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -45,9 +45,6 @@ static struct vcpu_info __percpu *xen_vcpu_info;
 unsigned long xen_released_pages;
 struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
 
-int xen_platform_pci_unplug = XEN_UNPLUG_ALL;
-EXPORT_SYMBOL_GPL(xen_platform_pci_unplug);
-
 static __read_mostly unsigned int xen_events_irq;
 
 static __initdata struct device_node *xen_node;

From 17fb46b1190b677a37cdd636e2aa30052109f51b Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 17 Jul 2015 06:51:22 +0200
Subject: [PATCH 314/734] xen: sync with xen headers

Use the newest headers from the xen tree to get some new structure
layouts.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
Acked-by: Konrad Rzeszutek Wilk <Konrad.wilk@oracle.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/x86/include/asm/xen/interface.h | 96 +++++++++++++++++++++++++---
 include/xen/interface/xen.h          | 35 +++++-----
 2 files changed, 107 insertions(+), 24 deletions(-)

diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index 3400dbaec3c31e..3b88eeacdbda2a 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -3,12 +3,38 @@
  *
  * Guest OS interface to x86 Xen.
  *
- * Copyright (c) 2004, K A Fraser
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2004-2006, K A Fraser
  */
 
 #ifndef _ASM_X86_XEN_INTERFACE_H
 #define _ASM_X86_XEN_INTERFACE_H
 
+/*
+ * XEN_GUEST_HANDLE represents a guest pointer, when passed as a field
+ * in a struct in memory.
+ * XEN_GUEST_HANDLE_PARAM represent a guest pointer, when passed as an
+ * hypercall argument.
+ * XEN_GUEST_HANDLE_PARAM and XEN_GUEST_HANDLE are the same on X86 but
+ * they might not be on other architectures.
+ */
 #ifdef __XEN__
 #define __DEFINE_GUEST_HANDLE(name, type) \
     typedef struct { type *p; } __guest_handle_ ## name
@@ -88,13 +114,16 @@ DEFINE_GUEST_HANDLE(xen_ulong_t);
  * start of the GDT because some stupid OSes export hard-coded selector values
  * in their ABI. These hard-coded values are always near the start of the GDT,
  * so Xen places itself out of the way, at the far end of the GDT.
+ *
+ * NB The LDT is set using the MMUEXT_SET_LDT op of HYPERVISOR_mmuext_op
  */
 #define FIRST_RESERVED_GDT_PAGE  14
 #define FIRST_RESERVED_GDT_BYTE  (FIRST_RESERVED_GDT_PAGE * 4096)
 #define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8)
 
 /*
- * Send an array of these to HYPERVISOR_set_trap_table()
+ * Send an array of these to HYPERVISOR_set_trap_table().
+ * Terminate the array with a sentinel entry, with traps[].address==0.
  * The privilege level specifies which modes may enter a trap via a software
  * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate
  * privilege levels as follows:
@@ -118,10 +147,41 @@ struct trap_info {
 DEFINE_GUEST_HANDLE_STRUCT(trap_info);
 
 struct arch_shared_info {
-    unsigned long max_pfn;                  /* max pfn that appears in table */
-    /* Frame containing list of mfns containing list of mfns containing p2m. */
-    unsigned long pfn_to_mfn_frame_list_list;
-    unsigned long nmi_reason;
+	/*
+	 * Number of valid entries in the p2m table(s) anchored at
+	 * pfn_to_mfn_frame_list_list and/or p2m_vaddr.
+	 */
+	unsigned long max_pfn;
+	/*
+	 * Frame containing list of mfns containing list of mfns containing p2m.
+	 * A value of 0 indicates it has not yet been set up, ~0 indicates it
+	 * has been set to invalid e.g. due to the p2m being too large for the
+	 * 3-level p2m tree. In this case the linear mapper p2m list anchored
+	 * at p2m_vaddr is to be used.
+	 */
+	xen_pfn_t pfn_to_mfn_frame_list_list;
+	unsigned long nmi_reason;
+	/*
+	 * Following three fields are valid if p2m_cr3 contains a value
+	 * different from 0.
+	 * p2m_cr3 is the root of the address space where p2m_vaddr is valid.
+	 * p2m_cr3 is in the same format as a cr3 value in the vcpu register
+	 * state and holds the folded machine frame number (via xen_pfn_to_cr3)
+	 * of a L3 or L4 page table.
+	 * p2m_vaddr holds the virtual address of the linear p2m list. All
+	 * entries in the range [0...max_pfn[ are accessible via this pointer.
+	 * p2m_generation will be incremented by the guest before and after each
+	 * change of the mappings of the p2m list. p2m_generation starts at 0
+	 * and a value with the least significant bit set indicates that a
+	 * mapping update is in progress. This allows guest external software
+	 * (e.g. in Dom0) to verify that read mappings are consistent and
+	 * whether they have changed since the last check.
+	 * Modifying a p2m element in the linear p2m list is allowed via an
+	 * atomic write only.
+	 */
+	unsigned long p2m_cr3;		/* cr3 value of the p2m address space */
+	unsigned long p2m_vaddr;	/* virtual address of the p2m list */
+	unsigned long p2m_generation;	/* generation count of p2m mapping */
 };
 #endif	/* !__ASSEMBLY__ */
 
@@ -137,13 +197,31 @@ struct arch_shared_info {
 /*
  * The following is all CPU context. Note that the fpu_ctxt block is filled
  * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
+ *
+ * Also note that when calling DOMCTL_setvcpucontext and VCPU_initialise
+ * for HVM and PVH guests, not all information in this structure is updated:
+ *
+ * - For HVM guests, the structures read include: fpu_ctxt (if
+ * VGCT_I387_VALID is set), flags, user_regs, debugreg[*]
+ *
+ * - PVH guests are the same as HVM guests, but additionally use ctrlreg[3] to
+ * set cr3. All other fields not used should be set to 0.
  */
 struct vcpu_guest_context {
     /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */
     struct { char x[512]; } fpu_ctxt;       /* User-level FPU registers     */
-#define VGCF_I387_VALID (1<<0)
-#define VGCF_HVM_GUEST  (1<<1)
-#define VGCF_IN_KERNEL  (1<<2)
+#define VGCF_I387_VALID                (1<<0)
+#define VGCF_IN_KERNEL                 (1<<2)
+#define _VGCF_i387_valid               0
+#define VGCF_i387_valid                (1<<_VGCF_i387_valid)
+#define _VGCF_in_kernel                2
+#define VGCF_in_kernel                 (1<<_VGCF_in_kernel)
+#define _VGCF_failsafe_disables_events 3
+#define VGCF_failsafe_disables_events  (1<<_VGCF_failsafe_disables_events)
+#define _VGCF_syscall_disables_events  4
+#define VGCF_syscall_disables_events   (1<<_VGCF_syscall_disables_events)
+#define _VGCF_online                   5
+#define VGCF_online                    (1<<_VGCF_online)
     unsigned long flags;                    /* VGCF_* flags                 */
     struct cpu_user_regs user_regs;         /* User-level CPU registers     */
     struct trap_info trap_ctxt[256];        /* Virtual IDT                  */
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index a4837895806235..8194270edcf00d 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -585,26 +585,29 @@ struct shared_info {
 };
 
 /*
- * Start-of-day memory layout for the initial domain (DOM0):
+ * Start-of-day memory layout
+ *
  *  1. The domain is started within contiguous virtual-memory region.
  *  2. The contiguous region begins and ends on an aligned 4MB boundary.
- *  3. The region start corresponds to the load address of the OS image.
- *     If the load address is not 4MB aligned then the address is rounded down.
- *  4. This the order of bootstrap elements in the initial virtual region:
+ *  3. This the order of bootstrap elements in the initial virtual region:
  *      a. relocated kernel image
  *      b. initial ram disk              [mod_start, mod_len]
+ *         (may be omitted)
  *      c. list of allocated page frames [mfn_list, nr_pages]
+ *         (unless relocated due to XEN_ELFNOTE_INIT_P2M)
  *      d. start_info_t structure        [register ESI (x86)]
- *      e. bootstrap page tables         [pt_base, CR3 (x86)]
- *      f. bootstrap stack               [register ESP (x86)]
- *  5. Bootstrap elements are packed together, but each is 4kB-aligned.
- *  6. The initial ram disk may be omitted.
- *  7. The list of page frames forms a contiguous 'pseudo-physical' memory
+ *         in case of dom0 this page contains the console info, too
+ *      e. unless dom0: xenstore ring page
+ *      f. unless dom0: console ring page
+ *      g. bootstrap page tables         [pt_base, CR3 (x86)]
+ *      h. bootstrap stack               [register ESP (x86)]
+ *  4. Bootstrap elements are packed together, but each is 4kB-aligned.
+ *  5. The list of page frames forms a contiguous 'pseudo-physical' memory
  *     layout for the domain. In particular, the bootstrap virtual-memory
  *     region is a 1:1 mapping to the first section of the pseudo-physical map.
- *  8. All bootstrap elements are mapped read-writable for the guest OS. The
+ *  6. All bootstrap elements are mapped read-writable for the guest OS. The
  *     only exception is the bootstrap page table, which is mapped read-only.
- *  9. There is guaranteed to be at least 512kB padding after the final
+ *  7. There is guaranteed to be at least 512kB padding after the final
  *     bootstrap element. If necessary, the bootstrap virtual region is
  *     extended by an extra 4MB to ensure this.
  */
@@ -641,10 +644,12 @@ struct start_info {
 };
 
 /* These flags are passed in the 'flags' field of start_info_t. */
-#define SIF_PRIVILEGED    (1<<0)  /* Is the domain privileged? */
-#define SIF_INITDOMAIN    (1<<1)  /* Is this the initial control domain? */
-#define SIF_MULTIBOOT_MOD (1<<2)  /* Is mod_start a multiboot module? */
-#define SIF_MOD_START_PFN (1<<3)  /* Is mod_start a PFN? */
+#define SIF_PRIVILEGED      (1<<0)  /* Is the domain privileged? */
+#define SIF_INITDOMAIN      (1<<1)  /* Is this the initial control domain? */
+#define SIF_MULTIBOOT_MOD   (1<<2)  /* Is mod_start a multiboot module? */
+#define SIF_MOD_START_PFN   (1<<3)  /* Is mod_start a PFN? */
+#define SIF_VIRT_P2M_4TOOLS (1<<4)  /* Do Xen tools understand a virt. mapped */
+				    /* P->M making the 3 level tree obsolete? */
 #define SIF_PM_MASK       (0xFF<<8) /* reserve 1 byte for xen-pm options */
 
 /*

From 4b9c9a11803eaa73b3223da9fcaea39b2f919d80 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 17 Jul 2015 06:51:23 +0200
Subject: [PATCH 315/734] xen: save linear p2m list address in shared info
 structure

The virtual address of the linear p2m list should be stored in the
shared info structure read by the Xen tools to be able to support
64 bit pv-domains larger than 512 GB. Additionally the linear p2m
list interface includes a generation count which is changed prior
to and after each mapping change of the p2m list. Reading the
generation count the Xen tools can detect changes of the mappings
and re-read the p2m list eventually.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
Acked-by: Konrad Rzeszutek Wilk <Konrad.wilk@oracle.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/x86/xen/p2m.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 8b7f18e200aa4a..b89983e9656f52 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -263,6 +263,10 @@ void xen_setup_mfn_list_list(void)
 	HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
 		virt_to_mfn(p2m_top_mfn);
 	HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn;
+	HYPERVISOR_shared_info->arch.p2m_generation = 0;
+	HYPERVISOR_shared_info->arch.p2m_vaddr = (unsigned long)xen_p2m_addr;
+	HYPERVISOR_shared_info->arch.p2m_cr3 =
+		xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
 }
 
 /* Set up p2m_top to point to the domain-builder provided p2m pages */
@@ -478,8 +482,12 @@ static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg)
 
 		ptechk = lookup_address(vaddr, &level);
 		if (ptechk == pte_pg) {
+			HYPERVISOR_shared_info->arch.p2m_generation++;
+			wmb(); /* Tools are synchronizing via p2m_generation. */
 			set_pmd(pmdp,
 				__pmd(__pa(pte_newpg[i]) | _KERNPG_TABLE));
+			wmb(); /* Tools are synchronizing via p2m_generation. */
+			HYPERVISOR_shared_info->arch.p2m_generation++;
 			pte_newpg[i] = NULL;
 		}
 
@@ -577,8 +585,12 @@ static bool alloc_p2m(unsigned long pfn)
 		spin_lock_irqsave(&p2m_update_lock, flags);
 
 		if (pte_pfn(*ptep) == p2m_pfn) {
+			HYPERVISOR_shared_info->arch.p2m_generation++;
+			wmb(); /* Tools are synchronizing via p2m_generation. */
 			set_pte(ptep,
 				pfn_pte(PFN_DOWN(__pa(p2m)), PAGE_KERNEL));
+			wmb(); /* Tools are synchronizing via p2m_generation. */
+			HYPERVISOR_shared_info->arch.p2m_generation++;
 			if (mid_mfn)
 				mid_mfn[mididx] = virt_to_mfn(p2m);
 			p2m = NULL;
@@ -630,6 +642,11 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
 		return true;
 	}
 
+	/*
+	 * The interface requires atomic updates on p2m elements.
+	 * xen_safe_write_ulong() is using __put_user which does an atomic
+	 * store via asm().
+	 */
 	if (likely(!xen_safe_write_ulong(xen_p2m_addr + pfn, mfn)))
 		return true;
 

From d51e8b3e85972dee10be7943b0b0106742b1e847 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 17 Jul 2015 06:51:24 +0200
Subject: [PATCH 316/734] xen: don't build mfn tree if tools don't need it

In case the Xen tools indicate they don't need the p2m 3 level tree
as they support the virtual mapped linear p2m list, just omit building
the tree.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
Acked-by: Konrad Rzeszutek Wilk <Konrad.wilk@oracle.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/x86/xen/p2m.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index b89983e9656f52..c719f7c36cb8c7 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -199,7 +199,8 @@ void __ref xen_build_mfn_list_list(void)
 	unsigned int level, topidx, mididx;
 	unsigned long *mid_mfn_p;
 
-	if (xen_feature(XENFEAT_auto_translated_physmap))
+	if (xen_feature(XENFEAT_auto_translated_physmap) ||
+	    xen_start_info->flags & SIF_VIRT_P2M_4TOOLS)
 		return;
 
 	/* Pre-initialize p2m_top_mfn to be completely missing */
@@ -260,8 +261,11 @@ void xen_setup_mfn_list_list(void)
 
 	BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
 
-	HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
-		virt_to_mfn(p2m_top_mfn);
+	if (xen_start_info->flags & SIF_VIRT_P2M_4TOOLS)
+		HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = ~0UL;
+	else
+		HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
+			virt_to_mfn(p2m_top_mfn);
 	HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn;
 	HYPERVISOR_shared_info->arch.p2m_generation = 0;
 	HYPERVISOR_shared_info->arch.p2m_vaddr = (unsigned long)xen_p2m_addr;

From 8f5b0c63987207fd5c3c1f89c9eb6cb95b30386e Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 17 Jul 2015 06:51:25 +0200
Subject: [PATCH 317/734] xen: eliminate scalability issues from initial
 mapping setup

Direct Xen to place the initial P->M table outside of the initial
mapping, as otherwise the 1G (implementation) / 2G (theoretical)
restriction on the size of the initial mapping limits the amount
of memory a domain can be handed initially.

As the initial P->M table is copied rather early during boot to
domain private memory and it's initial virtual mapping is dropped,
the easiest way to avoid virtual address conflicts with other
addresses in the kernel is to use a user address area for the
virtual address of the initial P->M table. This allows us to just
throw away the page tables of the initial mapping after the copy
without having to care about address invalidation.

It should be noted that this patch won't enable a pv-domain to USE
more than 512 GB of RAM. It just enables it to be started with a
P->M table covering more memory. This is especially important for
being able to boot a Dom0 on a system with more than 512 GB memory.

Signed-off-by: Juergen Gross <jgross@suse.com>
Based-on-patch-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Konrad Rzeszutek Wilk <Konrad.wilk@oracle.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/x86/xen/mmu.c      | 126 ++++++++++++++++++++++++++++++++++++----
 arch/x86/xen/setup.c    |  67 ++++++++++++---------
 arch/x86/xen/xen-head.S |   2 +
 3 files changed, 156 insertions(+), 39 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index dd151b2045b0e3..c04e14e6b30188 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1114,6 +1114,77 @@ static void __init xen_cleanhighmap(unsigned long vaddr,
 	xen_mc_flush();
 }
 
+/*
+ * Make a page range writeable and free it.
+ */
+static void __init xen_free_ro_pages(unsigned long paddr, unsigned long size)
+{
+	void *vaddr = __va(paddr);
+	void *vaddr_end = vaddr + size;
+
+	for (; vaddr < vaddr_end; vaddr += PAGE_SIZE)
+		make_lowmem_page_readwrite(vaddr);
+
+	memblock_free(paddr, size);
+}
+
+static void __init xen_cleanmfnmap_free_pgtbl(void *pgtbl)
+{
+	unsigned long pa = __pa(pgtbl) & PHYSICAL_PAGE_MASK;
+
+	ClearPagePinned(virt_to_page(__va(pa)));
+	xen_free_ro_pages(pa, PAGE_SIZE);
+}
+
+/*
+ * Since it is well isolated we can (and since it is perhaps large we should)
+ * also free the page tables mapping the initial P->M table.
+ */
+static void __init xen_cleanmfnmap(unsigned long vaddr)
+{
+	unsigned long va = vaddr & PMD_MASK;
+	unsigned long pa;
+	pgd_t *pgd = pgd_offset_k(va);
+	pud_t *pud_page = pud_offset(pgd, 0);
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	unsigned int i;
+
+	set_pgd(pgd, __pgd(0));
+	do {
+		pud = pud_page + pud_index(va);
+		if (pud_none(*pud)) {
+			va += PUD_SIZE;
+		} else if (pud_large(*pud)) {
+			pa = pud_val(*pud) & PHYSICAL_PAGE_MASK;
+			xen_free_ro_pages(pa, PUD_SIZE);
+			va += PUD_SIZE;
+		} else {
+			pmd = pmd_offset(pud, va);
+			if (pmd_large(*pmd)) {
+				pa = pmd_val(*pmd) & PHYSICAL_PAGE_MASK;
+				xen_free_ro_pages(pa, PMD_SIZE);
+			} else if (!pmd_none(*pmd)) {
+				pte = pte_offset_kernel(pmd, va);
+				for (i = 0; i < PTRS_PER_PTE; ++i) {
+					if (pte_none(pte[i]))
+						break;
+					pa = pte_pfn(pte[i]) << PAGE_SHIFT;
+					xen_free_ro_pages(pa, PAGE_SIZE);
+				}
+				xen_cleanmfnmap_free_pgtbl(pte);
+			}
+			va += PMD_SIZE;
+			if (pmd_index(va))
+				continue;
+			xen_cleanmfnmap_free_pgtbl(pmd);
+		}
+
+	} while (pud_index(va) || pmd_index(va));
+	xen_cleanmfnmap_free_pgtbl(pud_page);
+}
+
 static void __init xen_pagetable_p2m_free(void)
 {
 	unsigned long size;
@@ -1128,18 +1199,25 @@ static void __init xen_pagetable_p2m_free(void)
 	/* using __ka address and sticking INVALID_P2M_ENTRY! */
 	memset((void *)xen_start_info->mfn_list, 0xff, size);
 
-	/* We should be in __ka space. */
-	BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map);
 	addr = xen_start_info->mfn_list;
-	/* We roundup to the PMD, which means that if anybody at this stage is
-	 * using the __ka address of xen_start_info or xen_start_info->shared_info
-	 * they are in going to crash. Fortunatly we have already revectored
-	 * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */
+	/*
+	 * We could be in __ka space.
+	 * We roundup to the PMD, which means that if anybody at this stage is
+	 * using the __ka address of xen_start_info or
+	 * xen_start_info->shared_info they are in going to crash. Fortunatly
+	 * we have already revectored in xen_setup_kernel_pagetable and in
+	 * xen_setup_shared_info.
+	 */
 	size = roundup(size, PMD_SIZE);
-	xen_cleanhighmap(addr, addr + size);
 
-	size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
-	memblock_free(__pa(xen_start_info->mfn_list), size);
+	if (addr >= __START_KERNEL_map) {
+		xen_cleanhighmap(addr, addr + size);
+		size = PAGE_ALIGN(xen_start_info->nr_pages *
+				  sizeof(unsigned long));
+		memblock_free(__pa(addr), size);
+	} else {
+		xen_cleanmfnmap(addr);
+	}
 
 	/* At this stage, cleanup_highmap has already cleaned __ka space
 	 * from _brk_limit way up to the max_pfn_mapped (which is the end of
@@ -1461,6 +1539,24 @@ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
 #else /* CONFIG_X86_64 */
 static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
 {
+	unsigned long pfn;
+
+	if (xen_feature(XENFEAT_writable_page_tables) ||
+	    xen_feature(XENFEAT_auto_translated_physmap) ||
+	    xen_start_info->mfn_list >= __START_KERNEL_map)
+		return pte;
+
+	/*
+	 * Pages belonging to the initial p2m list mapped outside the default
+	 * address range must be mapped read-only. This region contains the
+	 * page tables for mapping the p2m list, too, and page tables MUST be
+	 * mapped read-only.
+	 */
+	pfn = pte_pfn(pte);
+	if (pfn >= xen_start_info->first_p2m_pfn &&
+	    pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames)
+		pte = __pte_ma(pte_val_ma(pte) & ~_PAGE_RW);
+
 	return pte;
 }
 #endif /* CONFIG_X86_64 */
@@ -1815,7 +1911,10 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 	 * mappings. Considering that on Xen after the kernel mappings we
 	 * have the mappings of some pages that don't exist in pfn space, we
 	 * set max_pfn_mapped to the last real pfn mapped. */
-	max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list));
+	if (xen_start_info->mfn_list < __START_KERNEL_map)
+		max_pfn_mapped = xen_start_info->first_p2m_pfn;
+	else
+		max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list));
 
 	pt_base = PFN_DOWN(__pa(xen_start_info->pt_base));
 	pt_end = pt_base + xen_start_info->nr_pt_frames;
@@ -1855,6 +1954,11 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 	/* Graft it onto L4[511][510] */
 	copy_page(level2_kernel_pgt, l2);
 
+	/* Copy the initial P->M table mappings if necessary. */
+	i = pgd_index(xen_start_info->mfn_list);
+	if (i && i < pgd_index(__START_KERNEL_map))
+		init_level4_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
+
 	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
 		/* Make pagetable pieces RO */
 		set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
@@ -1895,6 +1999,8 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 
 	/* Our (by three pages) smaller Xen pagetable that we are using */
 	memblock_reserve(PFN_PHYS(pt_base), (pt_end - pt_base) * PAGE_SIZE);
+	/* protect xen_start_info */
+	memblock_reserve(__pa(xen_start_info), PAGE_SIZE);
 	/* Revector the xen_start_info */
 	xen_start_info = (struct start_info *)__va(__pa(xen_start_info));
 }
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 55f388ef481a40..adad417ca1ba3f 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -560,6 +560,41 @@ static void __init xen_ignore_unusable(struct e820entry *list, size_t map_size)
 	}
 }
 
+/*
+ * Reserve Xen mfn_list.
+ * See comment above "struct start_info" in <xen/interface/xen.h>
+ * We tried to make the the memblock_reserve more selective so
+ * that it would be clear what region is reserved. Sadly we ran
+ * in the problem wherein on a 64-bit hypervisor with a 32-bit
+ * initial domain, the pt_base has the cr3 value which is not
+ * neccessarily where the pagetable starts! As Jan put it: "
+ * Actually, the adjustment turns out to be correct: The page
+ * tables for a 32-on-64 dom0 get allocated in the order "first L1",
+ * "first L2", "first L3", so the offset to the page table base is
+ * indeed 2. When reading xen/include/public/xen.h's comment
+ * very strictly, this is not a violation (since there nothing is said
+ * that the first thing in the page table space is pointed to by
+ * pt_base; I admit that this seems to be implied though, namely
+ * do I think that it is implied that the page table space is the
+ * range [pt_base, pt_base + nt_pt_frames), whereas that
+ * range here indeed is [pt_base - 2, pt_base - 2 + nt_pt_frames),
+ * which - without a priori knowledge - the kernel would have
+ * difficulty to figure out)." - so lets just fall back to the
+ * easy way and reserve the whole region.
+ */
+static void __init xen_reserve_xen_mfnlist(void)
+{
+	if (xen_start_info->mfn_list >= __START_KERNEL_map) {
+		memblock_reserve(__pa(xen_start_info->mfn_list),
+				 xen_start_info->pt_base -
+				 xen_start_info->mfn_list);
+		return;
+	}
+
+	memblock_reserve(PFN_PHYS(xen_start_info->first_p2m_pfn),
+			 PFN_PHYS(xen_start_info->nr_p2m_frames));
+}
+
 /**
  * machine_specific_memory_setup - Hook for machine specific memory setup.
  **/
@@ -684,35 +719,10 @@ char * __init xen_memory_setup(void)
 	e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
 			E820_RESERVED);
 
-	/*
-	 * Reserve Xen bits:
-	 *  - mfn_list
-	 *  - xen_start_info
-	 * See comment above "struct start_info" in <xen/interface/xen.h>
-	 * We tried to make the the memblock_reserve more selective so
-	 * that it would be clear what region is reserved. Sadly we ran
-	 * in the problem wherein on a 64-bit hypervisor with a 32-bit
-	 * initial domain, the pt_base has the cr3 value which is not
-	 * neccessarily where the pagetable starts! As Jan put it: "
-	 * Actually, the adjustment turns out to be correct: The page
-	 * tables for a 32-on-64 dom0 get allocated in the order "first L1",
-	 * "first L2", "first L3", so the offset to the page table base is
-	 * indeed 2. When reading xen/include/public/xen.h's comment
-	 * very strictly, this is not a violation (since there nothing is said
-	 * that the first thing in the page table space is pointed to by
-	 * pt_base; I admit that this seems to be implied though, namely
-	 * do I think that it is implied that the page table space is the
-	 * range [pt_base, pt_base + nt_pt_frames), whereas that
-	 * range here indeed is [pt_base - 2, pt_base - 2 + nt_pt_frames),
-	 * which - without a priori knowledge - the kernel would have
-	 * difficulty to figure out)." - so lets just fall back to the
-	 * easy way and reserve the whole region.
-	 */
-	memblock_reserve(__pa(xen_start_info->mfn_list),
-			 xen_start_info->pt_base - xen_start_info->mfn_list);
-
 	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
 
+	xen_reserve_xen_mfnlist();
+
 	return "Xen";
 }
 
@@ -739,8 +749,7 @@ char * __init xen_auto_xlated_memory_setup(void)
 	for (i = 0; i < memmap.nr_entries; i++)
 		e820_add_region(map[i].addr, map[i].size, map[i].type);
 
-	memblock_reserve(__pa(xen_start_info->mfn_list),
-			 xen_start_info->pt_base - xen_start_info->mfn_list);
+	xen_reserve_xen_mfnlist();
 
 	return "Xen";
 }
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 8afdfccf608634..b65f59a358a220 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -104,6 +104,8 @@ ENTRY(hypercall_page)
 	ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE,      _ASM_PTR __PAGE_OFFSET)
 #else
 	ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE,      _ASM_PTR __START_KERNEL_map)
+	/* Map the p2m table to a 512GB-aligned user address. */
+	ELFNOTE(Xen, XEN_ELFNOTE_INIT_P2M,       .quad PGDIR_SIZE)
 #endif
 	ELFNOTE(Xen, XEN_ELFNOTE_ENTRY,          _ASM_PTR startup_xen)
 	ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page)

From 69632ecfcd03b12202ed62dfa0aabac83904f8ac Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 17 Jul 2015 06:51:26 +0200
Subject: [PATCH 318/734] xen: move static e820 map to global scope

Instead of using a function local static e820 map in xen_memory_setup()
and calling various functions in the same source with the map as a
parameter use a map directly accessible by all functions in the source.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
Acked-by: Konrad Rzeszutek Wilk <Konrad.wilk@oracle.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/x86/xen/setup.c | 96 ++++++++++++++++++++++----------------------
 1 file changed, 49 insertions(+), 47 deletions(-)

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index adad417ca1ba3f..ab6c36e1801bcc 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -38,6 +38,10 @@ struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
 /* Number of pages released from the initial allocation. */
 unsigned long xen_released_pages;
 
+/* E820 map used during setting up memory. */
+static struct e820entry xen_e820_map[E820MAX] __initdata;
+static u32 xen_e820_map_entries __initdata;
+
 /*
  * Buffer used to remap identity mapped pages. We only need the virtual space.
  * The physical page behind this address is remapped as needed to different
@@ -164,15 +168,13 @@ void __init xen_inv_extra_mem(void)
  * This function updates min_pfn with the pfn found and returns
  * the size of that range or zero if not found.
  */
-static unsigned long __init xen_find_pfn_range(
-	const struct e820entry *list, size_t map_size,
-	unsigned long *min_pfn)
+static unsigned long __init xen_find_pfn_range(unsigned long *min_pfn)
 {
-	const struct e820entry *entry;
+	const struct e820entry *entry = xen_e820_map;
 	unsigned int i;
 	unsigned long done = 0;
 
-	for (i = 0, entry = list; i < map_size; i++, entry++) {
+	for (i = 0; i < xen_e820_map_entries; i++, entry++) {
 		unsigned long s_pfn;
 		unsigned long e_pfn;
 
@@ -356,9 +358,9 @@ static void __init xen_do_set_identity_and_remap_chunk(
  * to Xen and not remapped.
  */
 static unsigned long __init xen_set_identity_and_remap_chunk(
-        const struct e820entry *list, size_t map_size, unsigned long start_pfn,
-	unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn,
-	unsigned long *released, unsigned long *remapped)
+	unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages,
+	unsigned long remap_pfn, unsigned long *released,
+	unsigned long *remapped)
 {
 	unsigned long pfn;
 	unsigned long i = 0;
@@ -379,8 +381,7 @@ static unsigned long __init xen_set_identity_and_remap_chunk(
 		if (cur_pfn + size > nr_pages)
 			size = nr_pages - cur_pfn;
 
-		remap_range_size = xen_find_pfn_range(list, map_size,
-						      &remap_pfn);
+		remap_range_size = xen_find_pfn_range(&remap_pfn);
 		if (!remap_range_size) {
 			pr_warning("Unable to find available pfn range, not remapping identity pages\n");
 			xen_set_identity_and_release_chunk(cur_pfn,
@@ -411,13 +412,12 @@ static unsigned long __init xen_set_identity_and_remap_chunk(
 	return remap_pfn;
 }
 
-static void __init xen_set_identity_and_remap(
-	const struct e820entry *list, size_t map_size, unsigned long nr_pages,
-	unsigned long *released, unsigned long *remapped)
+static void __init xen_set_identity_and_remap(unsigned long nr_pages,
+			unsigned long *released, unsigned long *remapped)
 {
 	phys_addr_t start = 0;
 	unsigned long last_pfn = nr_pages;
-	const struct e820entry *entry;
+	const struct e820entry *entry = xen_e820_map;
 	unsigned long num_released = 0;
 	unsigned long num_remapped = 0;
 	int i;
@@ -433,9 +433,9 @@ static void __init xen_set_identity_and_remap(
 	 * example) the DMI tables in a reserved region that begins on
 	 * a non-page boundary.
 	 */
-	for (i = 0, entry = list; i < map_size; i++, entry++) {
+	for (i = 0; i < xen_e820_map_entries; i++, entry++) {
 		phys_addr_t end = entry->addr + entry->size;
-		if (entry->type == E820_RAM || i == map_size - 1) {
+		if (entry->type == E820_RAM || i == xen_e820_map_entries - 1) {
 			unsigned long start_pfn = PFN_DOWN(start);
 			unsigned long end_pfn = PFN_UP(end);
 
@@ -444,9 +444,9 @@ static void __init xen_set_identity_and_remap(
 
 			if (start_pfn < end_pfn)
 				last_pfn = xen_set_identity_and_remap_chunk(
-						list, map_size, start_pfn,
-						end_pfn, nr_pages, last_pfn,
-						&num_released, &num_remapped);
+						start_pfn, end_pfn, nr_pages,
+						last_pfn, &num_released,
+						&num_remapped);
 			start = end;
 		}
 	}
@@ -549,12 +549,12 @@ static void __init xen_align_and_add_e820_region(phys_addr_t start,
 	e820_add_region(start, end - start, type);
 }
 
-static void __init xen_ignore_unusable(struct e820entry *list, size_t map_size)
+static void __init xen_ignore_unusable(void)
 {
-	struct e820entry *entry;
+	struct e820entry *entry = xen_e820_map;
 	unsigned int i;
 
-	for (i = 0, entry = list; i < map_size; i++, entry++) {
+	for (i = 0; i < xen_e820_map_entries; i++, entry++) {
 		if (entry->type == E820_UNUSABLE)
 			entry->type = E820_RAM;
 	}
@@ -600,8 +600,6 @@ static void __init xen_reserve_xen_mfnlist(void)
  **/
 char * __init xen_memory_setup(void)
 {
-	static struct e820entry map[E820MAX] __initdata;
-
 	unsigned long max_pfn = xen_start_info->nr_pages;
 	phys_addr_t mem_end;
 	int rc;
@@ -616,7 +614,7 @@ char * __init xen_memory_setup(void)
 	mem_end = PFN_PHYS(max_pfn);
 
 	memmap.nr_entries = E820MAX;
-	set_xen_guest_handle(memmap.buffer, map);
+	set_xen_guest_handle(memmap.buffer, xen_e820_map);
 
 	op = xen_initial_domain() ?
 		XENMEM_machine_memory_map :
@@ -625,15 +623,16 @@ char * __init xen_memory_setup(void)
 	if (rc == -ENOSYS) {
 		BUG_ON(xen_initial_domain());
 		memmap.nr_entries = 1;
-		map[0].addr = 0ULL;
-		map[0].size = mem_end;
+		xen_e820_map[0].addr = 0ULL;
+		xen_e820_map[0].size = mem_end;
 		/* 8MB slack (to balance backend allocations). */
-		map[0].size += 8ULL << 20;
-		map[0].type = E820_RAM;
+		xen_e820_map[0].size += 8ULL << 20;
+		xen_e820_map[0].type = E820_RAM;
 		rc = 0;
 	}
 	BUG_ON(rc);
 	BUG_ON(memmap.nr_entries == 0);
+	xen_e820_map_entries = memmap.nr_entries;
 
 	/*
 	 * Xen won't allow a 1:1 mapping to be created to UNUSABLE
@@ -644,10 +643,11 @@ char * __init xen_memory_setup(void)
 	 * a patch in the future.
 	 */
 	if (xen_initial_domain())
-		xen_ignore_unusable(map, memmap.nr_entries);
+		xen_ignore_unusable();
 
 	/* Make sure the Xen-supplied memory map is well-ordered. */
-	sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries);
+	sanitize_e820_map(xen_e820_map, xen_e820_map_entries,
+			  &xen_e820_map_entries);
 
 	max_pages = xen_get_max_pages();
 	if (max_pages > max_pfn)
@@ -657,8 +657,8 @@ char * __init xen_memory_setup(void)
 	 * Set identity map on non-RAM pages and prepare remapping the
 	 * underlying RAM.
 	 */
-	xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn,
-				   &xen_released_pages, &remapped_pages);
+	xen_set_identity_and_remap(max_pfn, &xen_released_pages,
+				   &remapped_pages);
 
 	extra_pages += xen_released_pages;
 	extra_pages += remapped_pages;
@@ -677,10 +677,10 @@ char * __init xen_memory_setup(void)
 	extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
 			  extra_pages);
 	i = 0;
-	while (i < memmap.nr_entries) {
-		phys_addr_t addr = map[i].addr;
-		phys_addr_t size = map[i].size;
-		u32 type = map[i].type;
+	while (i < xen_e820_map_entries) {
+		phys_addr_t addr = xen_e820_map[i].addr;
+		phys_addr_t size = xen_e820_map[i].size;
+		u32 type = xen_e820_map[i].type;
 
 		if (type == E820_RAM) {
 			if (addr < mem_end) {
@@ -696,9 +696,9 @@ char * __init xen_memory_setup(void)
 
 		xen_align_and_add_e820_region(addr, size, type);
 
-		map[i].addr += size;
-		map[i].size -= size;
-		if (map[i].size == 0)
+		xen_e820_map[i].addr += size;
+		xen_e820_map[i].size -= size;
+		if (xen_e820_map[i].size == 0)
 			i++;
 	}
 
@@ -709,7 +709,7 @@ char * __init xen_memory_setup(void)
 	 * PFNs above MAX_P2M_PFN are considered identity mapped as
 	 * well.
 	 */
-	set_phys_range_identity(map[i-1].addr / PAGE_SIZE, ~0ul);
+	set_phys_range_identity(xen_e820_map[i - 1].addr / PAGE_SIZE, ~0ul);
 
 	/*
 	 * In domU, the ISA region is normal, usable memory, but we
@@ -731,23 +731,25 @@ char * __init xen_memory_setup(void)
  */
 char * __init xen_auto_xlated_memory_setup(void)
 {
-	static struct e820entry map[E820MAX] __initdata;
-
 	struct xen_memory_map memmap;
 	int i;
 	int rc;
 
 	memmap.nr_entries = E820MAX;
-	set_xen_guest_handle(memmap.buffer, map);
+	set_xen_guest_handle(memmap.buffer, xen_e820_map);
 
 	rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
 	if (rc < 0)
 		panic("No memory map (%d)\n", rc);
 
-	sanitize_e820_map(map, ARRAY_SIZE(map), &memmap.nr_entries);
+	xen_e820_map_entries = memmap.nr_entries;
+
+	sanitize_e820_map(xen_e820_map, ARRAY_SIZE(xen_e820_map),
+			  &xen_e820_map_entries);
 
-	for (i = 0; i < memmap.nr_entries; i++)
-		e820_add_region(map[i].addr, map[i].size, map[i].type);
+	for (i = 0; i < xen_e820_map_entries; i++)
+		e820_add_region(xen_e820_map[i].addr, xen_e820_map[i].size,
+				xen_e820_map[i].type);
 
 	xen_reserve_xen_mfnlist();
 

From 5097cdf6cef15439f971df54f9abcf143d7ca698 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 17 Jul 2015 06:51:27 +0200
Subject: [PATCH 319/734] xen: split counting of extra memory pages from
 remapping

Memory pages in the initial memory setup done by the Xen hypervisor
conflicting with the target E820 map are remapped. In order to do this
those pages are counted and remapped in xen_set_identity_and_remap().

Split the counting from the remapping operation to be able to setup
the needed memory sizes in time but doing the remap operation at a
later time. This enables us to simplify the interface to
xen_set_identity_and_remap() as the number of remapped and released
pages is no longer needed here.

Finally move the remapping further down to prepare relocating
conflicting memory contents before the memory might be clobbered by
xen_set_identity_and_remap(). This requires to not destroy the Xen
E820 map when the one for the system is being constructed.

Signed-off-by: Juergen Gross <jgross@suse.com>
Acked-by: Konrad Rzeszutek Wilk <Konrad.wilk@oracle.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/x86/xen/setup.c | 98 ++++++++++++++++++++++++++------------------
 1 file changed, 58 insertions(+), 40 deletions(-)

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index ab6c36e1801bcc..87251b4c2e303d 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -223,7 +223,7 @@ static int __init xen_free_mfn(unsigned long mfn)
  * as a fallback if the remapping fails.
  */
 static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn,
-	unsigned long end_pfn, unsigned long nr_pages, unsigned long *released)
+			unsigned long end_pfn, unsigned long nr_pages)
 {
 	unsigned long pfn, end;
 	int ret;
@@ -243,7 +243,7 @@ static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn,
 		WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret);
 
 		if (ret == 1) {
-			(*released)++;
+			xen_released_pages++;
 			if (!__set_phys_to_machine(pfn, INVALID_P2M_ENTRY))
 				break;
 		} else
@@ -359,8 +359,7 @@ static void __init xen_do_set_identity_and_remap_chunk(
  */
 static unsigned long __init xen_set_identity_and_remap_chunk(
 	unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages,
-	unsigned long remap_pfn, unsigned long *released,
-	unsigned long *remapped)
+	unsigned long remap_pfn)
 {
 	unsigned long pfn;
 	unsigned long i = 0;
@@ -385,7 +384,7 @@ static unsigned long __init xen_set_identity_and_remap_chunk(
 		if (!remap_range_size) {
 			pr_warning("Unable to find available pfn range, not remapping identity pages\n");
 			xen_set_identity_and_release_chunk(cur_pfn,
-				cur_pfn + left, nr_pages, released);
+						cur_pfn + left, nr_pages);
 			break;
 		}
 		/* Adjust size to fit in current e820 RAM region */
@@ -397,7 +396,6 @@ static unsigned long __init xen_set_identity_and_remap_chunk(
 		/* Update variables to reflect new mappings. */
 		i += size;
 		remap_pfn += size;
-		*remapped += size;
 	}
 
 	/*
@@ -412,14 +410,11 @@ static unsigned long __init xen_set_identity_and_remap_chunk(
 	return remap_pfn;
 }
 
-static void __init xen_set_identity_and_remap(unsigned long nr_pages,
-			unsigned long *released, unsigned long *remapped)
+static void __init xen_set_identity_and_remap(unsigned long nr_pages)
 {
 	phys_addr_t start = 0;
 	unsigned long last_pfn = nr_pages;
 	const struct e820entry *entry = xen_e820_map;
-	unsigned long num_released = 0;
-	unsigned long num_remapped = 0;
 	int i;
 
 	/*
@@ -445,16 +440,12 @@ static void __init xen_set_identity_and_remap(unsigned long nr_pages,
 			if (start_pfn < end_pfn)
 				last_pfn = xen_set_identity_and_remap_chunk(
 						start_pfn, end_pfn, nr_pages,
-						last_pfn, &num_released,
-						&num_remapped);
+						last_pfn);
 			start = end;
 		}
 	}
 
-	*released = num_released;
-	*remapped = num_remapped;
-
-	pr_info("Released %ld page(s)\n", num_released);
+	pr_info("Released %ld page(s)\n", xen_released_pages);
 }
 
 /*
@@ -560,6 +551,28 @@ static void __init xen_ignore_unusable(void)
 	}
 }
 
+static unsigned long __init xen_count_remap_pages(unsigned long max_pfn)
+{
+	unsigned long extra = 0;
+	const struct e820entry *entry = xen_e820_map;
+	int i;
+
+	for (i = 0; i < xen_e820_map_entries; i++, entry++) {
+		unsigned long start_pfn = PFN_DOWN(entry->addr);
+		unsigned long end_pfn = PFN_UP(entry->addr + entry->size);
+
+		if (start_pfn >= max_pfn)
+			break;
+		if (entry->type == E820_RAM)
+			continue;
+		if (end_pfn >= max_pfn)
+			end_pfn = max_pfn;
+		extra += end_pfn - start_pfn;
+	}
+
+	return extra;
+}
+
 /*
  * Reserve Xen mfn_list.
  * See comment above "struct start_info" in <xen/interface/xen.h>
@@ -601,12 +614,12 @@ static void __init xen_reserve_xen_mfnlist(void)
 char * __init xen_memory_setup(void)
 {
 	unsigned long max_pfn = xen_start_info->nr_pages;
-	phys_addr_t mem_end;
+	phys_addr_t mem_end, addr, size, chunk_size;
+	u32 type;
 	int rc;
 	struct xen_memory_map memmap;
 	unsigned long max_pages;
 	unsigned long extra_pages = 0;
-	unsigned long remapped_pages;
 	int i;
 	int op;
 
@@ -653,15 +666,8 @@ char * __init xen_memory_setup(void)
 	if (max_pages > max_pfn)
 		extra_pages += max_pages - max_pfn;
 
-	/*
-	 * Set identity map on non-RAM pages and prepare remapping the
-	 * underlying RAM.
-	 */
-	xen_set_identity_and_remap(max_pfn, &xen_released_pages,
-				   &remapped_pages);
-
-	extra_pages += xen_released_pages;
-	extra_pages += remapped_pages;
+	/* How many extra pages do we need due to remapping? */
+	extra_pages += xen_count_remap_pages(max_pfn);
 
 	/*
 	 * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
@@ -677,29 +683,35 @@ char * __init xen_memory_setup(void)
 	extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
 			  extra_pages);
 	i = 0;
+	addr = xen_e820_map[0].addr;
+	size = xen_e820_map[0].size;
 	while (i < xen_e820_map_entries) {
-		phys_addr_t addr = xen_e820_map[i].addr;
-		phys_addr_t size = xen_e820_map[i].size;
-		u32 type = xen_e820_map[i].type;
+		chunk_size = size;
+		type = xen_e820_map[i].type;
 
 		if (type == E820_RAM) {
 			if (addr < mem_end) {
-				size = min(size, mem_end - addr);
+				chunk_size = min(size, mem_end - addr);
 			} else if (extra_pages) {
-				size = min(size, PFN_PHYS(extra_pages));
-				extra_pages -= PFN_DOWN(size);
-				xen_add_extra_mem(addr, size);
-				xen_max_p2m_pfn = PFN_DOWN(addr + size);
+				chunk_size = min(size, PFN_PHYS(extra_pages));
+				extra_pages -= PFN_DOWN(chunk_size);
+				xen_add_extra_mem(addr, chunk_size);
+				xen_max_p2m_pfn = PFN_DOWN(addr + chunk_size);
 			} else
 				type = E820_UNUSABLE;
 		}
 
-		xen_align_and_add_e820_region(addr, size, type);
+		xen_align_and_add_e820_region(addr, chunk_size, type);
 
-		xen_e820_map[i].addr += size;
-		xen_e820_map[i].size -= size;
-		if (xen_e820_map[i].size == 0)
+		addr += chunk_size;
+		size -= chunk_size;
+		if (size == 0) {
 			i++;
+			if (i < xen_e820_map_entries) {
+				addr = xen_e820_map[i].addr;
+				size = xen_e820_map[i].size;
+			}
+		}
 	}
 
 	/*
@@ -709,7 +721,7 @@ char * __init xen_memory_setup(void)
 	 * PFNs above MAX_P2M_PFN are considered identity mapped as
 	 * well.
 	 */
-	set_phys_range_identity(xen_e820_map[i - 1].addr / PAGE_SIZE, ~0ul);
+	set_phys_range_identity(addr / PAGE_SIZE, ~0ul);
 
 	/*
 	 * In domU, the ISA region is normal, usable memory, but we
@@ -723,6 +735,12 @@ char * __init xen_memory_setup(void)
 
 	xen_reserve_xen_mfnlist();
 
+	/*
+	 * Set identity map on non-RAM pages and prepare remapping the
+	 * underlying RAM.
+	 */
+	xen_set_identity_and_remap(max_pfn);
+
 	return "Xen";
 }
 

From e612b4a7db4ae1dd8c2bbe171e10c21723de95b2 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 17 Jul 2015 06:51:28 +0200
Subject: [PATCH 320/734] xen: check memory area against e820 map

Provide a service routine to check a physical memory area against the
E820 map. The routine will return false if the complete area is RAM
according to the E820 map and true otherwise.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
Acked-by: Konrad Rzeszutek Wilk <Konrad.wilk@oracle.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/x86/xen/setup.c   | 23 +++++++++++++++++++++++
 arch/x86/xen/xen-ops.h |  1 +
 2 files changed, 24 insertions(+)

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 87251b4c2e303d..99ef82cc4edc3b 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -573,6 +573,29 @@ static unsigned long __init xen_count_remap_pages(unsigned long max_pfn)
 	return extra;
 }
 
+bool __init xen_is_e820_reserved(phys_addr_t start, phys_addr_t size)
+{
+	struct e820entry *entry;
+	unsigned mapcnt;
+	phys_addr_t end;
+
+	if (!size)
+		return false;
+
+	end = start + size;
+	entry = xen_e820_map;
+
+	for (mapcnt = 0; mapcnt < xen_e820_map_entries; mapcnt++) {
+		if (entry->type == E820_RAM && entry->addr <= start &&
+		    (entry->addr + entry->size) >= end)
+			return false;
+
+		entry++;
+	}
+
+	return true;
+}
+
 /*
  * Reserve Xen mfn_list.
  * See comment above "struct start_info" in <xen/interface/xen.h>
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 2292721b1d1038..a4cbb76642d874 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -39,6 +39,7 @@ void xen_reserve_top(void);
 void xen_mm_pin_all(void);
 void xen_mm_unpin_all(void);
 
+bool __init xen_is_e820_reserved(phys_addr_t start, phys_addr_t size);
 unsigned long __ref xen_chk_extra_mem(unsigned long pfn);
 void __init xen_inv_extra_mem(void);
 void __init xen_remap_memory(void);

From 9ddac5b724a9465e27f25a0aa943e92c8341a85b Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 17 Jul 2015 06:51:29 +0200
Subject: [PATCH 321/734] xen: find unused contiguous memory area

For being able to relocate pre-allocated data areas like initrd or
p2m list it is mandatory to find a contiguous memory area which is
not yet in use and doesn't conflict with the memory map we want to
be in effect.

In case such an area is found reserve it at once as this will be
required to be done in any case.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
Acked-by: Konrad Rzeszutek Wilk <Konrad.wilk@oracle.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/x86/xen/setup.c   | 34 ++++++++++++++++++++++++++++++++++
 arch/x86/xen/xen-ops.h |  1 +
 2 files changed, 35 insertions(+)

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 99ef82cc4edc3b..973d29441f9650 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -596,6 +596,40 @@ bool __init xen_is_e820_reserved(phys_addr_t start, phys_addr_t size)
 	return true;
 }
 
+/*
+ * Find a free area in physical memory not yet reserved and compliant with
+ * E820 map.
+ * Used to relocate pre-allocated areas like initrd or p2m list which are in
+ * conflict with the to be used E820 map.
+ * In case no area is found, return 0. Otherwise return the physical address
+ * of the area which is already reserved for convenience.
+ */
+phys_addr_t __init xen_find_free_area(phys_addr_t size)
+{
+	unsigned mapcnt;
+	phys_addr_t addr, start;
+	struct e820entry *entry = xen_e820_map;
+
+	for (mapcnt = 0; mapcnt < xen_e820_map_entries; mapcnt++, entry++) {
+		if (entry->type != E820_RAM || entry->size < size)
+			continue;
+		start = entry->addr;
+		for (addr = start; addr < start + size; addr += PAGE_SIZE) {
+			if (!memblock_is_reserved(addr))
+				continue;
+			start = addr + PAGE_SIZE;
+			if (start + size > entry->addr + entry->size)
+				break;
+		}
+		if (addr >= start + size) {
+			memblock_reserve(start, size);
+			return start;
+		}
+	}
+
+	return 0;
+}
+
 /*
  * Reserve Xen mfn_list.
  * See comment above "struct start_info" in <xen/interface/xen.h>
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index a4cbb76642d874..c4c3c23597a560 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -43,6 +43,7 @@ bool __init xen_is_e820_reserved(phys_addr_t start, phys_addr_t size);
 unsigned long __ref xen_chk_extra_mem(unsigned long pfn);
 void __init xen_inv_extra_mem(void);
 void __init xen_remap_memory(void);
+phys_addr_t __init xen_find_free_area(phys_addr_t size);
 char * __init xen_memory_setup(void);
 char * xen_auto_xlated_memory_setup(void);
 void __init xen_arch_setup(void);

From 808fdb71936c41d46245f0e3aa6ec889cba70d97 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 17 Jul 2015 06:51:30 +0200
Subject: [PATCH 322/734] xen: check for kernel memory conflicting with memory
 layout

Checks whether the pre-allocated memory of the loaded kernel is in
conflict with the target memory map. If this is the case, just panic
instead of run into problems later, as there is nothing we can do
to repair this situation.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
Acked-by: Konrad Rzeszutek Wilk <Konrad.wilk@oracle.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/x86/xen/setup.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 973d29441f9650..9bd3f358f3d996 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -27,6 +27,7 @@
 #include <xen/interface/memory.h>
 #include <xen/interface/physdev.h>
 #include <xen/features.h>
+#include <xen/hvc-console.h>
 #include "xen-ops.h"
 #include "vdso.h"
 #include "p2m.h"
@@ -790,6 +791,17 @@ char * __init xen_memory_setup(void)
 
 	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
 
+	/*
+	 * Check whether the kernel itself conflicts with the target E820 map.
+	 * Failing now is better than running into weird problems later due
+	 * to relocating (and even reusing) pages with kernel text or data.
+	 */
+	if (xen_is_e820_reserved(__pa_symbol(_text),
+			__pa_symbol(__bss_stop) - __pa_symbol(_text))) {
+		xen_raw_console_write("Xen hypervisor allocated kernel memory conflicts with E820 map\n");
+		BUG();
+	}
+
 	xen_reserve_xen_mfnlist();
 
 	/*

From 04414baab5ba862b10bde837c4773ffdbb78f0e0 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 17 Jul 2015 06:51:31 +0200
Subject: [PATCH 323/734] xen: check pre-allocated page tables for conflict
 with memory map

Check whether the page tables built by the domain builder are at
memory addresses which are in conflict with the target memory map.
If this is the case just panic instead of running into problems
later.

Signed-off-by: Juergen Gross <jgross@suse.com>
Acked-by: Konrad Rzeszutek Wilk <Konrad.wilk@oracle.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/x86/xen/mmu.c     | 19 ++++++++++++++++---
 arch/x86/xen/setup.c   |  6 ++++++
 arch/x86/xen/xen-ops.h |  1 +
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index c04e14e6b30188..1982617fa9c71e 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -116,6 +116,7 @@ static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
 DEFINE_PER_CPU(unsigned long, xen_cr3);	 /* cr3 stored as physaddr */
 DEFINE_PER_CPU(unsigned long, xen_current_cr3);	 /* actual vcpu cr3 */
 
+static phys_addr_t xen_pt_base, xen_pt_size __initdata;
 
 /*
  * Just beyond the highest usermode address.  STACK_TOP_MAX has a
@@ -1998,7 +1999,9 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 		check_pt_base(&pt_base, &pt_end, addr[i]);
 
 	/* Our (by three pages) smaller Xen pagetable that we are using */
-	memblock_reserve(PFN_PHYS(pt_base), (pt_end - pt_base) * PAGE_SIZE);
+	xen_pt_base = PFN_PHYS(pt_base);
+	xen_pt_size = (pt_end - pt_base) * PAGE_SIZE;
+	memblock_reserve(xen_pt_base, xen_pt_size);
 	/* protect xen_start_info */
 	memblock_reserve(__pa(xen_start_info), PAGE_SIZE);
 	/* Revector the xen_start_info */
@@ -2074,11 +2077,21 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 			  PFN_DOWN(__pa(initial_page_table)));
 	xen_write_cr3(__pa(initial_page_table));
 
-	memblock_reserve(__pa(xen_start_info->pt_base),
-			 xen_start_info->nr_pt_frames * PAGE_SIZE);
+	xen_pt_base = __pa(xen_start_info->pt_base);
+	xen_pt_size = xen_start_info->nr_pt_frames * PAGE_SIZE;
+
+	memblock_reserve(xen_pt_base, xen_pt_size);
 }
 #endif	/* CONFIG_X86_64 */
 
+void __init xen_pt_check_e820(void)
+{
+	if (xen_is_e820_reserved(xen_pt_base, xen_pt_size)) {
+		xen_raw_console_write("Xen hypervisor allocated page table memory conflicts with E820 map\n");
+		BUG();
+	}
+}
+
 static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss;
 
 static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 9bd3f358f3d996..3fca9c114828d0 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -802,6 +802,12 @@ char * __init xen_memory_setup(void)
 		BUG();
 	}
 
+	/*
+	 * Check for a conflict of the hypervisor supplied page tables with
+	 * the target E820 map.
+	 */
+	xen_pt_check_e820();
+
 	xen_reserve_xen_mfnlist();
 
 	/*
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index c4c3c23597a560..9038a0f1d5a1fc 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -35,6 +35,7 @@ void xen_build_mfn_list_list(void);
 void xen_setup_machphys_mapping(void);
 void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
 void xen_reserve_top(void);
+void __init xen_pt_check_e820(void);
 
 void xen_mm_pin_all(void);
 void xen_mm_unpin_all(void);

From 4b9c15377f96e241be347fd3bbeeff74fbad0b44 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 17 Jul 2015 06:51:32 +0200
Subject: [PATCH 324/734] xen: check for initrd conflicting with e820 map

Check whether the initrd is placed at a location which is conflicting
with the target E820 map. If this is the case relocate it to a new
area unused up to now and compliant to the E820 map.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
Acked-by: Konrad Rzeszutek Wilk <Konrad.wilk@oracle.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/x86/xen/setup.c | 51 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 3fca9c114828d0..0d7f8818f8ca52 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -631,6 +631,36 @@ phys_addr_t __init xen_find_free_area(phys_addr_t size)
 	return 0;
 }
 
+/*
+ * Like memcpy, but with physical addresses for dest and src.
+ */
+static void __init xen_phys_memcpy(phys_addr_t dest, phys_addr_t src,
+				   phys_addr_t n)
+{
+	phys_addr_t dest_off, src_off, dest_len, src_len, len;
+	void *from, *to;
+
+	while (n) {
+		dest_off = dest & ~PAGE_MASK;
+		src_off = src & ~PAGE_MASK;
+		dest_len = n;
+		if (dest_len > (NR_FIX_BTMAPS << PAGE_SHIFT) - dest_off)
+			dest_len = (NR_FIX_BTMAPS << PAGE_SHIFT) - dest_off;
+		src_len = n;
+		if (src_len > (NR_FIX_BTMAPS << PAGE_SHIFT) - src_off)
+			src_len = (NR_FIX_BTMAPS << PAGE_SHIFT) - src_off;
+		len = min(dest_len, src_len);
+		to = early_memremap(dest - dest_off, dest_len + dest_off);
+		from = early_memremap(src - src_off, src_len + src_off);
+		memcpy(to, from, len);
+		early_memunmap(to, dest_len + dest_off);
+		early_memunmap(from, src_len + src_off);
+		n -= len;
+		dest += len;
+		src += len;
+	}
+}
+
 /*
  * Reserve Xen mfn_list.
  * See comment above "struct start_info" in <xen/interface/xen.h>
@@ -810,6 +840,27 @@ char * __init xen_memory_setup(void)
 
 	xen_reserve_xen_mfnlist();
 
+	/* Check for a conflict of the initrd with the target E820 map. */
+	if (xen_is_e820_reserved(boot_params.hdr.ramdisk_image,
+				 boot_params.hdr.ramdisk_size)) {
+		phys_addr_t new_area, start, size;
+
+		new_area = xen_find_free_area(boot_params.hdr.ramdisk_size);
+		if (!new_area) {
+			xen_raw_console_write("Can't find new memory area for initrd needed due to E820 map conflict\n");
+			BUG();
+		}
+
+		start = boot_params.hdr.ramdisk_image;
+		size = boot_params.hdr.ramdisk_size;
+		xen_phys_memcpy(new_area, start, size);
+		pr_info("initrd moved from [mem %#010llx-%#010llx] to [mem %#010llx-%#010llx]\n",
+			start, start + size, new_area, new_area + size);
+		memblock_free(start, size);
+		boot_params.hdr.ramdisk_image = new_area;
+		boot_params.ext_ramdisk_image = new_area >> 32;
+	}
+
 	/*
 	 * Set identity map on non-RAM pages and prepare remapping the
 	 * underlying RAM.

From 2592dbbbf4c67501c2bd2dcf89c2b8924d592a9f Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 17 Jul 2015 06:51:33 +0200
Subject: [PATCH 325/734] mm: provide early_memremap_ro to establish read-only
 mapping

During early boot as Xen pv domain the kernel needs to map some page
tables supplied by the hypervisor read only. This is needed to be
able to relocate some data structures conflicting with the physical
memory map especially on systems with huge RAM (above 512GB).

Provide the function early_memremap_ro() to provide this read only
mapping.

Signed-off-by: Juergen Gross <jgross@suse.com>
Acked-by: Konrad Rzeszutek Wilk <Konrad.wilk@oracle.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 include/asm-generic/early_ioremap.h |  2 ++
 include/asm-generic/fixmap.h        |  3 +++
 mm/early_ioremap.c                  | 12 ++++++++++++
 3 files changed, 17 insertions(+)

diff --git a/include/asm-generic/early_ioremap.h b/include/asm-generic/early_ioremap.h
index a5de55c04fb2ee..316bd043319eba 100644
--- a/include/asm-generic/early_ioremap.h
+++ b/include/asm-generic/early_ioremap.h
@@ -11,6 +11,8 @@ extern void __iomem *early_ioremap(resource_size_t phys_addr,
 				   unsigned long size);
 extern void *early_memremap(resource_size_t phys_addr,
 			    unsigned long size);
+extern void *early_memremap_ro(resource_size_t phys_addr,
+			       unsigned long size);
 extern void early_iounmap(void __iomem *addr, unsigned long size);
 extern void early_memunmap(void *addr, unsigned long size);
 
diff --git a/include/asm-generic/fixmap.h b/include/asm-generic/fixmap.h
index f23174fb9ec434..1cbb8338edf391 100644
--- a/include/asm-generic/fixmap.h
+++ b/include/asm-generic/fixmap.h
@@ -46,6 +46,9 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr)
 #ifndef FIXMAP_PAGE_NORMAL
 #define FIXMAP_PAGE_NORMAL PAGE_KERNEL
 #endif
+#if !defined(FIXMAP_PAGE_RO) && defined(PAGE_KERNEL_RO)
+#define FIXMAP_PAGE_RO PAGE_KERNEL_RO
+#endif
 #ifndef FIXMAP_PAGE_NOCACHE
 #define FIXMAP_PAGE_NOCACHE PAGE_KERNEL_NOCACHE
 #endif
diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c
index e10ccd299d6666..0cfadafb3fb00b 100644
--- a/mm/early_ioremap.c
+++ b/mm/early_ioremap.c
@@ -217,6 +217,13 @@ early_memremap(resource_size_t phys_addr, unsigned long size)
 	return (__force void *)__early_ioremap(phys_addr, size,
 					       FIXMAP_PAGE_NORMAL);
 }
+#ifdef FIXMAP_PAGE_RO
+void __init *
+early_memremap_ro(resource_size_t phys_addr, unsigned long size)
+{
+	return (__force void *)__early_ioremap(phys_addr, size, FIXMAP_PAGE_RO);
+}
+#endif
 #else /* CONFIG_MMU */
 
 void __init __iomem *
@@ -231,6 +238,11 @@ early_memremap(resource_size_t phys_addr, unsigned long size)
 {
 	return (void *)phys_addr;
 }
+void __init *
+early_memremap_ro(resource_size_t phys_addr, unsigned long size)
+{
+	return (void *)phys_addr;
+}
 
 void __init early_iounmap(void __iomem *addr, unsigned long size)
 {

From 6c2681c863b24360098d1ba60f2af060a13a0561 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 17 Jul 2015 06:51:34 +0200
Subject: [PATCH 326/734] xen: add explicit memblock_reserve() calls for
 special pages

Some special pages containing interfaces to xen are being reserved
implicitly only today. The memblock_reserve() call to reserve them is
meant to reserve the p2m list supplied by xen. It is just reserving
not only the p2m list itself, but some more pages up to the start of
the xen built page tables.

To be able to move the p2m list to another pfn range, which is needed
for support of huge RAM, this memblock_reserve() must be split up to
cover all affected reserved pages explicitly.

The affected pages are:
- start_info page
- xenstore ring (might be missing, mfn is 0 in this case)
- console ring (not for initial domain)

Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/x86/xen/enlighten.c |  4 +++-
 arch/x86/xen/mmu.c       | 15 +++++++++++++++
 arch/x86/xen/xen-ops.h   |  1 +
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 11d6fb4e8483d5..373dbc9810d142 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1610,7 +1610,9 @@ asmlinkage __visible void __init xen_start_kernel(void)
 	early_boot_irqs_disabled = true;
 
 	xen_raw_console_write("mapping kernel into physical memory\n");
-	xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages);
+	xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base,
+				   xen_start_info->nr_pages);
+	xen_reserve_special_pages();
 
 	/*
 	 * Modify the cache mode translation tables to match Xen's PAT
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 1982617fa9c71e..a36e7b462640be 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2084,6 +2084,21 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 }
 #endif	/* CONFIG_X86_64 */
 
+void __init xen_reserve_special_pages(void)
+{
+	phys_addr_t paddr;
+
+	memblock_reserve(__pa(xen_start_info), PAGE_SIZE);
+	if (xen_start_info->store_mfn) {
+		paddr = PFN_PHYS(mfn_to_pfn(xen_start_info->store_mfn));
+		memblock_reserve(paddr, PAGE_SIZE);
+	}
+	if (!xen_initial_domain()) {
+		paddr = PFN_PHYS(mfn_to_pfn(xen_start_info->console.domU.mfn));
+		memblock_reserve(paddr, PAGE_SIZE);
+	}
+}
+
 void __init xen_pt_check_e820(void)
 {
 	if (xen_is_e820_reserved(xen_pt_base, xen_pt_size)) {
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 9038a0f1d5a1fc..8795c2ef57f3ae 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -35,6 +35,7 @@ void xen_build_mfn_list_list(void);
 void xen_setup_machphys_mapping(void);
 void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
 void xen_reserve_top(void);
+void __init xen_reserve_special_pages(void);
 void __init xen_pt_check_e820(void);
 
 void xen_mm_pin_all(void);

From 70e61199559a09c62714694cd5ac3c3640c41552 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 17 Jul 2015 06:51:35 +0200
Subject: [PATCH 327/734] xen: move p2m list if conflicting with e820 map

Check whether the hypervisor supplied p2m list is placed at a location
which is conflicting with the target E820 map. If this is the case
relocate it to a new area unused up to now and compliant to the E820
map.

As the p2m list might by huge (up to several GB) and is required to be
mapped virtually, set up a temporary mapping for the copied list.

For pvh domains just delete the p2m related information from start
info instead of reserving the p2m memory, as we don't need it at all.

For 32 bit kernels adjust the memblock_reserve() parameters in order
to cover the page tables only. This requires to memblock_reserve() the
start_info page on it's own.

Signed-off-by: Juergen Gross <jgross@suse.com>
Acked-by: Konrad Rzeszutek Wilk <Konrad.wilk@oracle.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/x86/xen/mmu.c     | 257 +++++++++++++++++++++++++++++++++++++----
 arch/x86/xen/setup.c   |  51 ++++----
 arch/x86/xen/xen-ops.h |   3 +
 3 files changed, 264 insertions(+), 47 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index a36e7b462640be..2c50b445884e86 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1094,6 +1094,16 @@ static void xen_exit_mmap(struct mm_struct *mm)
 
 static void xen_post_allocator_init(void);
 
+static void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
+{
+	struct mmuext_op op;
+
+	op.cmd = cmd;
+	op.arg1.mfn = pfn_to_mfn(pfn);
+	if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
+		BUG();
+}
+
 #ifdef CONFIG_X86_64
 static void __init xen_cleanhighmap(unsigned long vaddr,
 				    unsigned long vaddr_end)
@@ -1129,10 +1139,12 @@ static void __init xen_free_ro_pages(unsigned long paddr, unsigned long size)
 	memblock_free(paddr, size);
 }
 
-static void __init xen_cleanmfnmap_free_pgtbl(void *pgtbl)
+static void __init xen_cleanmfnmap_free_pgtbl(void *pgtbl, bool unpin)
 {
 	unsigned long pa = __pa(pgtbl) & PHYSICAL_PAGE_MASK;
 
+	if (unpin)
+		pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(pa));
 	ClearPagePinned(virt_to_page(__va(pa)));
 	xen_free_ro_pages(pa, PAGE_SIZE);
 }
@@ -1151,7 +1163,9 @@ static void __init xen_cleanmfnmap(unsigned long vaddr)
 	pmd_t *pmd;
 	pte_t *pte;
 	unsigned int i;
+	bool unpin;
 
+	unpin = (vaddr == 2 * PGDIR_SIZE);
 	set_pgd(pgd, __pgd(0));
 	do {
 		pud = pud_page + pud_index(va);
@@ -1168,22 +1182,24 @@ static void __init xen_cleanmfnmap(unsigned long vaddr)
 				xen_free_ro_pages(pa, PMD_SIZE);
 			} else if (!pmd_none(*pmd)) {
 				pte = pte_offset_kernel(pmd, va);
+				set_pmd(pmd, __pmd(0));
 				for (i = 0; i < PTRS_PER_PTE; ++i) {
 					if (pte_none(pte[i]))
 						break;
 					pa = pte_pfn(pte[i]) << PAGE_SHIFT;
 					xen_free_ro_pages(pa, PAGE_SIZE);
 				}
-				xen_cleanmfnmap_free_pgtbl(pte);
+				xen_cleanmfnmap_free_pgtbl(pte, unpin);
 			}
 			va += PMD_SIZE;
 			if (pmd_index(va))
 				continue;
-			xen_cleanmfnmap_free_pgtbl(pmd);
+			set_pud(pud, __pud(0));
+			xen_cleanmfnmap_free_pgtbl(pmd, unpin);
 		}
 
 	} while (pud_index(va) || pmd_index(va));
-	xen_cleanmfnmap_free_pgtbl(pud_page);
+	xen_cleanmfnmap_free_pgtbl(pud_page, unpin);
 }
 
 static void __init xen_pagetable_p2m_free(void)
@@ -1219,6 +1235,12 @@ static void __init xen_pagetable_p2m_free(void)
 	} else {
 		xen_cleanmfnmap(addr);
 	}
+}
+
+static void __init xen_pagetable_cleanhighmap(void)
+{
+	unsigned long size;
+	unsigned long addr;
 
 	/* At this stage, cleanup_highmap has already cleaned __ka space
 	 * from _brk_limit way up to the max_pfn_mapped (which is the end of
@@ -1251,6 +1273,8 @@ static void __init xen_pagetable_p2m_setup(void)
 
 #ifdef CONFIG_X86_64
 	xen_pagetable_p2m_free();
+
+	xen_pagetable_cleanhighmap();
 #endif
 	/* And revector! Bye bye old array */
 	xen_start_info->mfn_list = (unsigned long)xen_p2m_addr;
@@ -1586,15 +1610,6 @@ static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
 	native_set_pte(ptep, pte);
 }
 
-static void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
-{
-	struct mmuext_op op;
-	op.cmd = cmd;
-	op.arg1.mfn = pfn_to_mfn(pfn);
-	if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
-		BUG();
-}
-
 /* Early in boot, while setting up the initial pagetable, assume
    everything is pinned. */
 static void __init xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
@@ -2002,11 +2017,189 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 	xen_pt_base = PFN_PHYS(pt_base);
 	xen_pt_size = (pt_end - pt_base) * PAGE_SIZE;
 	memblock_reserve(xen_pt_base, xen_pt_size);
-	/* protect xen_start_info */
-	memblock_reserve(__pa(xen_start_info), PAGE_SIZE);
+
 	/* Revector the xen_start_info */
 	xen_start_info = (struct start_info *)__va(__pa(xen_start_info));
 }
+
+/*
+ * Read a value from a physical address.
+ */
+static unsigned long __init xen_read_phys_ulong(phys_addr_t addr)
+{
+	unsigned long *vaddr;
+	unsigned long val;
+
+	vaddr = early_memremap_ro(addr, sizeof(val));
+	val = *vaddr;
+	early_memunmap(vaddr, sizeof(val));
+	return val;
+}
+
+/*
+ * Translate a virtual address to a physical one without relying on mapped
+ * page tables.
+ */
+static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
+{
+	phys_addr_t pa;
+	pgd_t pgd;
+	pud_t pud;
+	pmd_t pmd;
+	pte_t pte;
+
+	pa = read_cr3();
+	pgd = native_make_pgd(xen_read_phys_ulong(pa + pgd_index(vaddr) *
+						       sizeof(pgd)));
+	if (!pgd_present(pgd))
+		return 0;
+
+	pa = pgd_val(pgd) & PTE_PFN_MASK;
+	pud = native_make_pud(xen_read_phys_ulong(pa + pud_index(vaddr) *
+						       sizeof(pud)));
+	if (!pud_present(pud))
+		return 0;
+	pa = pud_pfn(pud) << PAGE_SHIFT;
+	if (pud_large(pud))
+		return pa + (vaddr & ~PUD_MASK);
+
+	pmd = native_make_pmd(xen_read_phys_ulong(pa + pmd_index(vaddr) *
+						       sizeof(pmd)));
+	if (!pmd_present(pmd))
+		return 0;
+	pa = pmd_pfn(pmd) << PAGE_SHIFT;
+	if (pmd_large(pmd))
+		return pa + (vaddr & ~PMD_MASK);
+
+	pte = native_make_pte(xen_read_phys_ulong(pa + pte_index(vaddr) *
+						       sizeof(pte)));
+	if (!pte_present(pte))
+		return 0;
+	pa = pte_pfn(pte) << PAGE_SHIFT;
+
+	return pa | (vaddr & ~PAGE_MASK);
+}
+
+/*
+ * Find a new area for the hypervisor supplied p2m list and relocate the p2m to
+ * this area.
+ */
+void __init xen_relocate_p2m(void)
+{
+	phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys;
+	unsigned long p2m_pfn, p2m_pfn_end, n_frames, pfn, pfn_end;
+	int n_pte, n_pt, n_pmd, n_pud, idx_pte, idx_pt, idx_pmd, idx_pud;
+	pte_t *pt;
+	pmd_t *pmd;
+	pud_t *pud;
+	pgd_t *pgd;
+	unsigned long *new_p2m;
+
+	size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
+	n_pte = roundup(size, PAGE_SIZE) >> PAGE_SHIFT;
+	n_pt = roundup(size, PMD_SIZE) >> PMD_SHIFT;
+	n_pmd = roundup(size, PUD_SIZE) >> PUD_SHIFT;
+	n_pud = roundup(size, PGDIR_SIZE) >> PGDIR_SHIFT;
+	n_frames = n_pte + n_pt + n_pmd + n_pud;
+
+	new_area = xen_find_free_area(PFN_PHYS(n_frames));
+	if (!new_area) {
+		xen_raw_console_write("Can't find new memory area for p2m needed due to E820 map conflict\n");
+		BUG();
+	}
+
+	/*
+	 * Setup the page tables for addressing the new p2m list.
+	 * We have asked the hypervisor to map the p2m list at the user address
+	 * PUD_SIZE. It may have done so, or it may have used a kernel space
+	 * address depending on the Xen version.
+	 * To avoid any possible virtual address collision, just use
+	 * 2 * PUD_SIZE for the new area.
+	 */
+	pud_phys = new_area;
+	pmd_phys = pud_phys + PFN_PHYS(n_pud);
+	pt_phys = pmd_phys + PFN_PHYS(n_pmd);
+	p2m_pfn = PFN_DOWN(pt_phys) + n_pt;
+
+	pgd = __va(read_cr3());
+	new_p2m = (unsigned long *)(2 * PGDIR_SIZE);
+	for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
+		pud = early_memremap(pud_phys, PAGE_SIZE);
+		clear_page(pud);
+		for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
+		     idx_pmd++) {
+			pmd = early_memremap(pmd_phys, PAGE_SIZE);
+			clear_page(pmd);
+			for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
+			     idx_pt++) {
+				pt = early_memremap(pt_phys, PAGE_SIZE);
+				clear_page(pt);
+				for (idx_pte = 0;
+				     idx_pte < min(n_pte, PTRS_PER_PTE);
+				     idx_pte++) {
+					set_pte(pt + idx_pte,
+						pfn_pte(p2m_pfn, PAGE_KERNEL));
+					p2m_pfn++;
+				}
+				n_pte -= PTRS_PER_PTE;
+				early_memunmap(pt, PAGE_SIZE);
+				make_lowmem_page_readonly(__va(pt_phys));
+				pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
+						  PFN_DOWN(pt_phys));
+				set_pmd(pmd + idx_pt,
+					__pmd(_PAGE_TABLE | pt_phys));
+				pt_phys += PAGE_SIZE;
+			}
+			n_pt -= PTRS_PER_PMD;
+			early_memunmap(pmd, PAGE_SIZE);
+			make_lowmem_page_readonly(__va(pmd_phys));
+			pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
+					  PFN_DOWN(pmd_phys));
+			set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
+			pmd_phys += PAGE_SIZE;
+		}
+		n_pmd -= PTRS_PER_PUD;
+		early_memunmap(pud, PAGE_SIZE);
+		make_lowmem_page_readonly(__va(pud_phys));
+		pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
+		set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
+		pud_phys += PAGE_SIZE;
+	}
+
+	/* Now copy the old p2m info to the new area. */
+	memcpy(new_p2m, xen_p2m_addr, size);
+	xen_p2m_addr = new_p2m;
+
+	/* Release the old p2m list and set new list info. */
+	p2m_pfn = PFN_DOWN(xen_early_virt_to_phys(xen_start_info->mfn_list));
+	BUG_ON(!p2m_pfn);
+	p2m_pfn_end = p2m_pfn + PFN_DOWN(size);
+
+	if (xen_start_info->mfn_list < __START_KERNEL_map) {
+		pfn = xen_start_info->first_p2m_pfn;
+		pfn_end = xen_start_info->first_p2m_pfn +
+			  xen_start_info->nr_p2m_frames;
+		set_pgd(pgd + 1, __pgd(0));
+	} else {
+		pfn = p2m_pfn;
+		pfn_end = p2m_pfn_end;
+	}
+
+	memblock_free(PFN_PHYS(pfn), PAGE_SIZE * (pfn_end - pfn));
+	while (pfn < pfn_end) {
+		if (pfn == p2m_pfn) {
+			pfn = p2m_pfn_end;
+			continue;
+		}
+		make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
+		pfn++;
+	}
+
+	xen_start_info->mfn_list = (unsigned long)xen_p2m_addr;
+	xen_start_info->first_p2m_pfn =  PFN_DOWN(new_area);
+	xen_start_info->nr_p2m_frames = n_frames;
+}
+
 #else	/* !CONFIG_X86_64 */
 static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
 static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD);
@@ -2047,18 +2240,41 @@ static void __init xen_write_cr3_init(unsigned long cr3)
 	pv_mmu_ops.write_cr3 = &xen_write_cr3;
 }
 
+/*
+ * For 32 bit domains xen_start_info->pt_base is the pgd address which might be
+ * not the first page table in the page table pool.
+ * Iterate through the initial page tables to find the real page table base.
+ */
+static phys_addr_t xen_find_pt_base(pmd_t *pmd)
+{
+	phys_addr_t pt_base, paddr;
+	unsigned pmdidx;
+
+	pt_base = min(__pa(xen_start_info->pt_base), __pa(pmd));
+
+	for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++)
+		if (pmd_present(pmd[pmdidx]) && !pmd_large(pmd[pmdidx])) {
+			paddr = m2p(pmd[pmdidx].pmd);
+			pt_base = min(pt_base, paddr);
+		}
+
+	return pt_base;
+}
+
 void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 {
 	pmd_t *kernel_pmd;
 
+	kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
+
+	xen_pt_base = xen_find_pt_base(kernel_pmd);
+	xen_pt_size = xen_start_info->nr_pt_frames * PAGE_SIZE;
+
 	initial_kernel_pmd =
 		extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
 
-	max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) +
-				  xen_start_info->nr_pt_frames * PAGE_SIZE +
-				  512*1024);
+	max_pfn_mapped = PFN_DOWN(xen_pt_base + xen_pt_size + 512 * 1024);
 
-	kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
 	copy_page(initial_kernel_pmd, kernel_pmd);
 
 	xen_map_identity_early(initial_kernel_pmd, max_pfn);
@@ -2077,9 +2293,6 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 			  PFN_DOWN(__pa(initial_page_table)));
 	xen_write_cr3(__pa(initial_page_table));
 
-	xen_pt_base = __pa(xen_start_info->pt_base);
-	xen_pt_size = xen_start_info->nr_pt_frames * PAGE_SIZE;
-
 	memblock_reserve(xen_pt_base, xen_pt_size);
 }
 #endif	/* CONFIG_X86_64 */
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 0d7f8818f8ca52..b096d02d34ac53 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -663,37 +663,35 @@ static void __init xen_phys_memcpy(phys_addr_t dest, phys_addr_t src,
 
 /*
  * Reserve Xen mfn_list.
- * See comment above "struct start_info" in <xen/interface/xen.h>
- * We tried to make the the memblock_reserve more selective so
- * that it would be clear what region is reserved. Sadly we ran
- * in the problem wherein on a 64-bit hypervisor with a 32-bit
- * initial domain, the pt_base has the cr3 value which is not
- * neccessarily where the pagetable starts! As Jan put it: "
- * Actually, the adjustment turns out to be correct: The page
- * tables for a 32-on-64 dom0 get allocated in the order "first L1",
- * "first L2", "first L3", so the offset to the page table base is
- * indeed 2. When reading xen/include/public/xen.h's comment
- * very strictly, this is not a violation (since there nothing is said
- * that the first thing in the page table space is pointed to by
- * pt_base; I admit that this seems to be implied though, namely
- * do I think that it is implied that the page table space is the
- * range [pt_base, pt_base + nt_pt_frames), whereas that
- * range here indeed is [pt_base - 2, pt_base - 2 + nt_pt_frames),
- * which - without a priori knowledge - the kernel would have
- * difficulty to figure out)." - so lets just fall back to the
- * easy way and reserve the whole region.
  */
 static void __init xen_reserve_xen_mfnlist(void)
 {
+	phys_addr_t start, size;
+
 	if (xen_start_info->mfn_list >= __START_KERNEL_map) {
-		memblock_reserve(__pa(xen_start_info->mfn_list),
-				 xen_start_info->pt_base -
-				 xen_start_info->mfn_list);
+		start = __pa(xen_start_info->mfn_list);
+		size = PFN_ALIGN(xen_start_info->nr_pages *
+				 sizeof(unsigned long));
+	} else {
+		start = PFN_PHYS(xen_start_info->first_p2m_pfn);
+		size = PFN_PHYS(xen_start_info->nr_p2m_frames);
+	}
+
+	if (!xen_is_e820_reserved(start, size)) {
+		memblock_reserve(start, size);
 		return;
 	}
 
-	memblock_reserve(PFN_PHYS(xen_start_info->first_p2m_pfn),
-			 PFN_PHYS(xen_start_info->nr_p2m_frames));
+#ifdef CONFIG_X86_32
+	/*
+	 * Relocating the p2m on 32 bit system to an arbitrary virtual address
+	 * is not supported, so just give up.
+	 */
+	xen_raw_console_write("Xen hypervisor allocated p2m list conflicts with E820 map\n");
+	BUG();
+#else
+	xen_relocate_p2m();
+#endif
 }
 
 /**
@@ -895,7 +893,10 @@ char * __init xen_auto_xlated_memory_setup(void)
 		e820_add_region(xen_e820_map[i].addr, xen_e820_map[i].size,
 				xen_e820_map[i].type);
 
-	xen_reserve_xen_mfnlist();
+	/* Remove p2m info, it is not needed. */
+	xen_start_info->mfn_list = 0;
+	xen_start_info->first_p2m_pfn = 0;
+	xen_start_info->nr_p2m_frames = 0;
 
 	return "Xen";
 }
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 8795c2ef57f3ae..1399423f34183d 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -40,6 +40,9 @@ void __init xen_pt_check_e820(void);
 
 void xen_mm_pin_all(void);
 void xen_mm_unpin_all(void);
+#ifdef CONFIG_X86_64
+void __init xen_relocate_p2m(void);
+#endif
 
 bool __init xen_is_e820_reserved(phys_addr_t start, phys_addr_t size);
 unsigned long __ref xen_chk_extra_mem(unsigned long pfn);

From c70727a5bc18a5a233fddc6056d1de9144d7a293 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 17 Jul 2015 06:51:36 +0200
Subject: [PATCH 328/734] xen: allow more than 512 GB of RAM for 64 bit
 pv-domains

64 bit pv-domains under Xen are limited to 512 GB of RAM today. The
main reason has been the 3 level p2m tree, which was replaced by the
virtual mapped linear p2m list. Parallel to the p2m list which is
being used by the kernel itself there is a 3 level mfn tree for usage
by the Xen tools and eventually for crash dump analysis. For this tree
the linear p2m list can serve as a replacement, too. As the kernel
can't know whether the tools are capable of dealing with the p2m list
instead of the mfn tree, the limit of 512 GB can't be dropped in all
cases.

This patch replaces the hard limit by a kernel parameter which tells
the kernel to obey the 512 GB limit or not. The default is selected by
a configuration parameter which specifies whether the 512 GB limit
should be active per default for domUs (domain save/restore/migration
and crash dump analysis are affected).

Memory above the domain limit is returned to the hypervisor instead of
being identity mapped, which was wrong anyway.

The kernel configuration parameter to specify the maximum size of a
domain can be deleted, as it is not relevant any more.

Signed-off-by: Juergen Gross <jgross@suse.com>
Acked-by: Konrad Rzeszutek Wilk <Konrad.wilk@oracle.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 Documentation/kernel-parameters.txt |  7 ++++
 arch/x86/include/asm/xen/page.h     |  4 --
 arch/x86/xen/Kconfig                | 20 ++++++----
 arch/x86/xen/p2m.c                  | 10 ++---
 arch/x86/xen/setup.c                | 59 ++++++++++++++++++++++++-----
 5 files changed, 73 insertions(+), 27 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 1d6f0459cd7bbe..47f7b985f833e4 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -4078,6 +4078,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			plus one apbt timer for broadcast timer.
 			x86_intel_mid_timer=apbt_only | lapic_and_apbt
 
+	xen_512gb_limit		[KNL,X86-64,XEN]
+			Restricts the kernel running paravirtualized under Xen
+			to use only up to 512 GB of RAM. The reason to do so is
+			crash analysis tools and Xen tools for doing domain
+			save/restore/migration must be enabled to handle larger
+			domains.
+
 	xen_emul_unplug=		[HW,X86,XEN]
 			Unplug Xen emulated devices
 			Format: [unplug0,][unplug1]
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index c44a5d53e46473..10ef14b2616da3 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -35,10 +35,6 @@ typedef struct xpaddr {
 #define FOREIGN_FRAME(m)	((m) | FOREIGN_FRAME_BIT)
 #define IDENTITY_FRAME(m)	((m) | IDENTITY_FRAME_BIT)
 
-/* Maximum amount of memory we can handle in a domain in pages */
-#define MAX_DOMAIN_PAGES						\
-    ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE))
-
 extern unsigned long *machine_to_phys_mapping;
 extern unsigned long  machine_to_phys_nr;
 extern unsigned long *xen_p2m_addr;
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index e88fda867a33b1..7bcf21b865d114 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -23,14 +23,18 @@ config XEN_PVHVM
 	def_bool y
 	depends on XEN && PCI && X86_LOCAL_APIC
 
-config XEN_MAX_DOMAIN_MEMORY
-       int
-       default 500 if X86_64
-       default 64 if X86_32
-       depends on XEN
-       help
-         This only affects the sizing of some bss arrays, the unused
-         portions of which are freed.
+config XEN_512GB
+	bool "Limit Xen pv-domain memory to 512GB"
+	depends on XEN && X86_64
+	default y
+	help
+	  Limit paravirtualized user domains to 512GB of RAM.
+
+	  The Xen tools and crash dump analysis tools might not support
+	  pv-domains with more than 512 GB of RAM. This option controls the
+	  default setting of the kernel to use only up to 512 GB or more.
+	  It is always possible to change the default via specifying the
+	  boot parameter "xen_512gb_limit".
 
 config XEN_SAVE_RESTORE
        bool
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index c719f7c36cb8c7..c059ca1c829378 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -517,7 +517,7 @@ static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg)
  */
 static bool alloc_p2m(unsigned long pfn)
 {
-	unsigned topidx, mididx;
+	unsigned topidx;
 	unsigned long *top_mfn_p, *mid_mfn;
 	pte_t *ptep, *pte_pg;
 	unsigned int level;
@@ -525,9 +525,6 @@ static bool alloc_p2m(unsigned long pfn)
 	unsigned long addr = (unsigned long)(xen_p2m_addr + pfn);
 	unsigned long p2m_pfn;
 
-	topidx = p2m_top_index(pfn);
-	mididx = p2m_mid_index(pfn);
-
 	ptep = lookup_address(addr, &level);
 	BUG_ON(!ptep || level != PG_LEVEL_4K);
 	pte_pg = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1));
@@ -539,7 +536,8 @@ static bool alloc_p2m(unsigned long pfn)
 			return false;
 	}
 
-	if (p2m_top_mfn) {
+	if (p2m_top_mfn && pfn < MAX_P2M_PFN) {
+		topidx = p2m_top_index(pfn);
 		top_mfn_p = &p2m_top_mfn[topidx];
 		mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]);
 
@@ -596,7 +594,7 @@ static bool alloc_p2m(unsigned long pfn)
 			wmb(); /* Tools are synchronizing via p2m_generation. */
 			HYPERVISOR_shared_info->arch.p2m_generation++;
 			if (mid_mfn)
-				mid_mfn[mididx] = virt_to_mfn(p2m);
+				mid_mfn[p2m_mid_index(pfn)] = virt_to_mfn(p2m);
 			p2m = NULL;
 		}
 
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index b096d02d34ac53..f96002107691a5 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -33,6 +33,8 @@
 #include "p2m.h"
 #include "mmu.h"
 
+#define GB(x) ((uint64_t)(x) * 1024 * 1024 * 1024)
+
 /* Amount of extra memory space we add to the e820 ranges */
 struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
 
@@ -69,6 +71,26 @@ static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY;
  */
 #define EXTRA_MEM_RATIO		(10)
 
+static bool xen_512gb_limit __initdata = IS_ENABLED(CONFIG_XEN_512GB);
+
+static void __init xen_parse_512gb(void)
+{
+	bool val = false;
+	char *arg;
+
+	arg = strstr(xen_start_info->cmd_line, "xen_512gb_limit");
+	if (!arg)
+		return;
+
+	arg = strstr(xen_start_info->cmd_line, "xen_512gb_limit=");
+	if (!arg)
+		val = true;
+	else if (strtobool(arg + strlen("xen_512gb_limit="), &val))
+		return;
+
+	xen_512gb_limit = val;
+}
+
 static void __init xen_add_extra_mem(phys_addr_t start, phys_addr_t size)
 {
 	int i;
@@ -503,12 +525,29 @@ void __init xen_remap_memory(void)
 	pr_info("Remapped %ld page(s)\n", remapped);
 }
 
+static unsigned long __init xen_get_pages_limit(void)
+{
+	unsigned long limit;
+
+#ifdef CONFIG_X86_32
+	limit = GB(64) / PAGE_SIZE;
+#else
+	limit = ~0ul;
+	if (!xen_initial_domain() && xen_512gb_limit)
+		limit = GB(512) / PAGE_SIZE;
+#endif
+	return limit;
+}
+
 static unsigned long __init xen_get_max_pages(void)
 {
-	unsigned long max_pages = MAX_DOMAIN_PAGES;
+	unsigned long max_pages, limit;
 	domid_t domid = DOMID_SELF;
 	int ret;
 
+	limit = xen_get_pages_limit();
+	max_pages = limit;
+
 	/*
 	 * For the initial domain we use the maximum reservation as
 	 * the maximum page.
@@ -524,7 +563,7 @@ static unsigned long __init xen_get_max_pages(void)
 			max_pages = ret;
 	}
 
-	return min(max_pages, MAX_DOMAIN_PAGES);
+	return min(max_pages, limit);
 }
 
 static void __init xen_align_and_add_e820_region(phys_addr_t start,
@@ -699,7 +738,7 @@ static void __init xen_reserve_xen_mfnlist(void)
  **/
 char * __init xen_memory_setup(void)
 {
-	unsigned long max_pfn = xen_start_info->nr_pages;
+	unsigned long max_pfn;
 	phys_addr_t mem_end, addr, size, chunk_size;
 	u32 type;
 	int rc;
@@ -709,7 +748,9 @@ char * __init xen_memory_setup(void)
 	int i;
 	int op;
 
-	max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);
+	xen_parse_512gb();
+	max_pfn = xen_get_pages_limit();
+	max_pfn = min(max_pfn, xen_start_info->nr_pages);
 	mem_end = PFN_PHYS(max_pfn);
 
 	memmap.nr_entries = E820MAX;
@@ -762,12 +803,15 @@ char * __init xen_memory_setup(void)
 	 * is limited to the max size of lowmem, so that it doesn't
 	 * get completely filled.
 	 *
+	 * Make sure we have no memory above max_pages, as this area
+	 * isn't handled by the p2m management.
+	 *
 	 * In principle there could be a problem in lowmem systems if
 	 * the initial memory is also very large with respect to
 	 * lowmem, but we won't try to deal with that here.
 	 */
-	extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
-			  extra_pages);
+	extra_pages = min3(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
+			   extra_pages, max_pages - max_pfn);
 	i = 0;
 	addr = xen_e820_map[0].addr;
 	size = xen_e820_map[0].size;
@@ -803,9 +847,6 @@ char * __init xen_memory_setup(void)
 	/*
 	 * Set the rest as identity mapped, in case PCI BARs are
 	 * located here.
-	 *
-	 * PFNs above MAX_P2M_PFN are considered identity mapped as
-	 * well.
 	 */
 	set_phys_range_identity(addr / PAGE_SIZE, ~0ul);
 

From cb3eb850137cd43fc3e25d2062525f5ba5fd884a Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 17 Jul 2015 06:51:37 +0200
Subject: [PATCH 329/734] xen: remove no longer needed p2m.h

Cleanup by removing arch/x86/xen/p2m.h as it isn't needed any more.

Most definitions in this file are used in p2m.c only. Move those into
p2m.c.

set_phys_range_identity() is already declared in
arch/x86/include/asm/xen/page.h, add __init annotation there.

MAX_REMAP_RANGES isn't used at all, just delete it.

The only define left is P2M_PER_PAGE which is moved to page.h as well.

Signed-off-by: Juergen Gross <jgross@suse.com>
Acked-by: Konrad Rzeszutek Wilk <Konrad.wilk@oracle.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/x86/include/asm/xen/page.h |  6 ++++--
 arch/x86/xen/p2m.c              |  6 +++++-
 arch/x86/xen/p2m.h              | 15 ---------------
 arch/x86/xen/setup.c            |  1 -
 4 files changed, 9 insertions(+), 19 deletions(-)
 delete mode 100644 arch/x86/xen/p2m.h

diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 10ef14b2616da3..a3804fbe1f36df 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -35,6 +35,8 @@ typedef struct xpaddr {
 #define FOREIGN_FRAME(m)	((m) | FOREIGN_FRAME_BIT)
 #define IDENTITY_FRAME(m)	((m) | IDENTITY_FRAME_BIT)
 
+#define P2M_PER_PAGE		(PAGE_SIZE / sizeof(unsigned long))
+
 extern unsigned long *machine_to_phys_mapping;
 extern unsigned long  machine_to_phys_nr;
 extern unsigned long *xen_p2m_addr;
@@ -44,8 +46,8 @@ extern unsigned long  xen_max_p2m_pfn;
 extern unsigned long get_phys_to_machine(unsigned long pfn);
 extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
 extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
-extern unsigned long set_phys_range_identity(unsigned long pfn_s,
-					     unsigned long pfn_e);
+extern unsigned long __init set_phys_range_identity(unsigned long pfn_s,
+						    unsigned long pfn_e);
 
 extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
 				   struct gnttab_map_grant_ref *kmap_ops,
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index c059ca1c829378..bfc08b13044b18 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -79,10 +79,14 @@
 #include <xen/balloon.h>
 #include <xen/grant_table.h>
 
-#include "p2m.h"
 #include "multicalls.h"
 #include "xen-ops.h"
 
+#define P2M_MID_PER_PAGE	(PAGE_SIZE / sizeof(unsigned long *))
+#define P2M_TOP_PER_PAGE	(PAGE_SIZE / sizeof(unsigned long **))
+
+#define MAX_P2M_PFN	(P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE)
+
 #define PMDS_PER_MID_PAGE	(P2M_MID_PER_PAGE / PTRS_PER_PTE)
 
 unsigned long *xen_p2m_addr __read_mostly;
diff --git a/arch/x86/xen/p2m.h b/arch/x86/xen/p2m.h
deleted file mode 100644
index ad8aee24ab7289..00000000000000
--- a/arch/x86/xen/p2m.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef _XEN_P2M_H
-#define _XEN_P2M_H
-
-#define P2M_PER_PAGE        (PAGE_SIZE / sizeof(unsigned long))
-#define P2M_MID_PER_PAGE    (PAGE_SIZE / sizeof(unsigned long *))
-#define P2M_TOP_PER_PAGE    (PAGE_SIZE / sizeof(unsigned long **))
-
-#define MAX_P2M_PFN         (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE)
-
-#define MAX_REMAP_RANGES    10
-
-extern unsigned long __init set_phys_range_identity(unsigned long pfn_s,
-                                      unsigned long pfn_e);
-
-#endif  /* _XEN_P2M_H */
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index f96002107691a5..a1a77eabe8588a 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -30,7 +30,6 @@
 #include <xen/hvc-console.h>
 #include "xen-ops.h"
 #include "vdso.h"
-#include "p2m.h"
 #include "mmu.h"
 
 #define GB(x) ((uint64_t)(x) * 1024 * 1024 * 1024)

From a11f4f0a4e18b4bdc7d5e36438711e038b7a1f74 Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Mon, 10 Aug 2015 16:34:32 -0400
Subject: [PATCH 330/734] xen: xensyms support

Export Xen symbols to dom0 via /proc/xen/xensyms (similar to
/proc/kallsyms).

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 drivers/xen/Kconfig              |   8 ++
 drivers/xen/xenfs/Makefile       |   1 +
 drivers/xen/xenfs/super.c        |   3 +
 drivers/xen/xenfs/xenfs.h        |   1 +
 drivers/xen/xenfs/xensyms.c      | 152 +++++++++++++++++++++++++++++++
 include/xen/interface/platform.h |  18 ++++
 6 files changed, 183 insertions(+)
 create mode 100644 drivers/xen/xenfs/xensyms.c

diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 7cd226da15fea2..936760455dd5a4 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -280,4 +280,12 @@ config XEN_ACPI
 	def_bool y
 	depends on X86 && ACPI
 
+config XEN_SYMS
+       bool "Xen symbols"
+       depends on X86 && XEN_DOM0 && XENFS
+       default y if KALLSYMS
+       help
+          Exports hypervisor symbols (along with their types and addresses) via
+          /proc/xen/xensyms file, similar to /proc/kallsyms
+
 endmenu
diff --git a/drivers/xen/xenfs/Makefile b/drivers/xen/xenfs/Makefile
index b019865fcc56b7..1a83010ddffafc 100644
--- a/drivers/xen/xenfs/Makefile
+++ b/drivers/xen/xenfs/Makefile
@@ -2,3 +2,4 @@ obj-$(CONFIG_XENFS) += xenfs.o
 
 xenfs-y			  = super.o
 xenfs-$(CONFIG_XEN_DOM0) += xenstored.o
+xenfs-$(CONFIG_XEN_SYMS) += xensyms.o
diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c
index 06092e0fe8cea4..8559a71f36b1a7 100644
--- a/drivers/xen/xenfs/super.c
+++ b/drivers/xen/xenfs/super.c
@@ -57,6 +57,9 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
 		{ "privcmd", &xen_privcmd_fops, S_IRUSR|S_IWUSR },
 		{ "xsd_kva", &xsd_kva_file_ops, S_IRUSR|S_IWUSR},
 		{ "xsd_port", &xsd_port_file_ops, S_IRUSR|S_IWUSR},
+#ifdef CONFIG_XEN_SYMS
+		{ "xensyms", &xensyms_ops, S_IRUSR},
+#endif
 		{""},
 	};
 
diff --git a/drivers/xen/xenfs/xenfs.h b/drivers/xen/xenfs/xenfs.h
index 6b80c7779c0217..2c5934ea9b1e54 100644
--- a/drivers/xen/xenfs/xenfs.h
+++ b/drivers/xen/xenfs/xenfs.h
@@ -3,5 +3,6 @@
 
 extern const struct file_operations xsd_kva_file_ops;
 extern const struct file_operations xsd_port_file_ops;
+extern const struct file_operations xensyms_ops;
 
 #endif	/* _XENFS_XENBUS_H */
diff --git a/drivers/xen/xenfs/xensyms.c b/drivers/xen/xenfs/xensyms.c
new file mode 100644
index 00000000000000..f8b12856753f0c
--- /dev/null
+++ b/drivers/xen/xenfs/xensyms.c
@@ -0,0 +1,152 @@
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/slab.h>
+#include <xen/interface/platform.h>
+#include <asm/xen/hypercall.h>
+#include <xen/xen-ops.h>
+#include "xenfs.h"
+
+
+#define XEN_KSYM_NAME_LEN 127 /* Hypervisor may have different name length */
+
+struct xensyms {
+	struct xen_platform_op op;
+	char *name;
+	uint32_t namelen;
+};
+
+/* Grab next output page from the hypervisor */
+static int xensyms_next_sym(struct xensyms *xs)
+{
+	int ret;
+	struct xenpf_symdata *symdata = &xs->op.u.symdata;
+	uint64_t symnum;
+
+	memset(xs->name, 0, xs->namelen);
+	symdata->namelen = xs->namelen;
+
+	symnum = symdata->symnum;
+
+	ret = HYPERVISOR_dom0_op(&xs->op);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * If hypervisor's symbol didn't fit into the buffer then allocate
+	 * a larger buffer and try again.
+	 */
+	if (unlikely(symdata->namelen > xs->namelen)) {
+		kfree(xs->name);
+
+		xs->namelen = symdata->namelen;
+		xs->name = kzalloc(xs->namelen, GFP_KERNEL);
+		if (!xs->name)
+			return -ENOMEM;
+
+		set_xen_guest_handle(symdata->name, xs->name);
+		symdata->symnum--; /* Rewind */
+
+		ret = HYPERVISOR_dom0_op(&xs->op);
+		if (ret < 0)
+			return ret;
+	}
+
+	if (symdata->symnum == symnum)
+		/* End of symbols */
+		return 1;
+
+	return 0;
+}
+
+static void *xensyms_start(struct seq_file *m, loff_t *pos)
+{
+	struct xensyms *xs = (struct xensyms *)m->private;
+
+	xs->op.u.symdata.symnum = *pos;
+
+	if (xensyms_next_sym(xs))
+		return NULL;
+
+	return m->private;
+}
+
+static void *xensyms_next(struct seq_file *m, void *p, loff_t *pos)
+{
+	struct xensyms *xs = (struct xensyms *)m->private;
+
+	xs->op.u.symdata.symnum = ++(*pos);
+
+	if (xensyms_next_sym(xs))
+		return NULL;
+
+	return p;
+}
+
+static int xensyms_show(struct seq_file *m, void *p)
+{
+	struct xensyms *xs = (struct xensyms *)m->private;
+	struct xenpf_symdata *symdata = &xs->op.u.symdata;
+
+	seq_printf(m, "%016llx %c %s\n", symdata->address,
+		   symdata->type, xs->name);
+
+	return 0;
+}
+
+static void xensyms_stop(struct seq_file *m, void *p)
+{
+}
+
+static const struct seq_operations xensyms_seq_ops = {
+	.start = xensyms_start,
+	.next = xensyms_next,
+	.show = xensyms_show,
+	.stop = xensyms_stop,
+};
+
+static int xensyms_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *m;
+	struct xensyms *xs;
+	int ret;
+
+	ret = seq_open_private(file, &xensyms_seq_ops,
+			       sizeof(struct xensyms));
+	if (ret)
+		return ret;
+
+	m = file->private_data;
+	xs = (struct xensyms *)m->private;
+
+	xs->namelen = XEN_KSYM_NAME_LEN + 1;
+	xs->name = kzalloc(xs->namelen, GFP_KERNEL);
+	if (!xs->name) {
+		seq_release_private(inode, file);
+		return -ENOMEM;
+	}
+	set_xen_guest_handle(xs->op.u.symdata.name, xs->name);
+	xs->op.cmd = XENPF_get_symbol;
+	xs->op.u.symdata.namelen = xs->namelen;
+
+	return 0;
+}
+
+static int xensyms_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *m = file->private_data;
+	struct xensyms *xs = (struct xensyms *)m->private;
+
+	kfree(xs->name);
+	return seq_release_private(inode, file);
+}
+
+const struct file_operations xensyms_ops = {
+	.open = xensyms_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = xensyms_release
+};
diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h
index 5cc49ea8d8406f..8e035871360e15 100644
--- a/include/xen/interface/platform.h
+++ b/include/xen/interface/platform.h
@@ -474,6 +474,23 @@ struct xenpf_core_parking {
 };
 DEFINE_GUEST_HANDLE_STRUCT(xenpf_core_parking);
 
+#define XENPF_get_symbol      63
+struct xenpf_symdata {
+	/* IN/OUT variables */
+	uint32_t	namelen; /* size of 'name' buffer */
+
+	/* IN/OUT variables */
+	uint32_t	symnum; /* IN:  Symbol to read                       */
+				/* OUT: Next available symbol. If same as IN */
+				/* then  we reached the end                  */
+
+	/* OUT variables */
+	GUEST_HANDLE(char) name;
+	uint64_t	address;
+	char            type;
+};
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_symdata);
+
 struct xen_platform_op {
 	uint32_t cmd;
 	uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
@@ -495,6 +512,7 @@ struct xen_platform_op {
 		struct xenpf_cpu_hotadd        cpu_add;
 		struct xenpf_mem_hotadd        mem_add;
 		struct xenpf_core_parking      core_parking;
+		struct xenpf_symdata           symdata;
 		uint8_t                        pad[128];
 	} u;
 };

From 5f141548824cebbff2e838ff401c34e667797467 Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Mon, 10 Aug 2015 16:34:33 -0400
Subject: [PATCH 331/734] xen/PMU: Sysfs interface for setting Xen PMU mode

Set Xen's PMU mode via /sys/hypervisor/pmu/pmu_mode. Add XENPMU hypercall.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 .../ABI/testing/sysfs-hypervisor-pmu          |  23 +++
 arch/x86/include/asm/xen/hypercall.h          |   6 +
 arch/x86/xen/Kconfig                          |   1 +
 drivers/xen/Kconfig                           |   3 +
 drivers/xen/sys-hypervisor.c                  | 136 +++++++++++++++++-
 include/xen/interface/xen.h                   |   1 +
 include/xen/interface/xenpmu.h                |  59 ++++++++
 7 files changed, 228 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/ABI/testing/sysfs-hypervisor-pmu
 create mode 100644 include/xen/interface/xenpmu.h

diff --git a/Documentation/ABI/testing/sysfs-hypervisor-pmu b/Documentation/ABI/testing/sysfs-hypervisor-pmu
new file mode 100644
index 00000000000000..224faa105e1879
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-hypervisor-pmu
@@ -0,0 +1,23 @@
+What:		/sys/hypervisor/pmu/pmu_mode
+Date:		August 2015
+KernelVersion:	4.3
+Contact:	Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Description:
+		Describes mode that Xen's performance-monitoring unit (PMU)
+		uses. Accepted values are
+			"off"  -- PMU is disabled
+			"self" -- The guest can profile itself
+			"hv"   -- The guest can profile itself and, if it is
+				  privileged (e.g. dom0), the hypervisor
+			"all" --  The guest can profile itself, the hypervisor
+				  and all other guests. Only available to
+				  privileged guests.
+
+What:           /sys/hypervisor/pmu/pmu_features
+Date:           August 2015
+KernelVersion:  4.3
+Contact:        Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Description:
+		Describes Xen PMU features (as an integer). A set bit indicates
+		that the corresponding feature is enabled. See
+		include/xen/interface/xenpmu.h for available features
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index ca08a27b90b3db..83aea8055119e2 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -465,6 +465,12 @@ HYPERVISOR_tmem_op(
 	return _hypercall1(int, tmem_op, op);
 }
 
+static inline int
+HYPERVISOR_xenpmu_op(unsigned int op, void *arg)
+{
+	return _hypercall2(int, xenpmu_op, op, arg);
+}
+
 static inline void
 MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set)
 {
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 7bcf21b865d114..a8ffdb85656f0d 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -7,6 +7,7 @@ config XEN
 	depends on PARAVIRT
 	select PARAVIRT_CLOCK
 	select XEN_HAVE_PVMMU
+	select XEN_HAVE_VPMU
 	depends on X86_64 || (X86_32 && X86_PAE)
 	depends on X86_TSC
 	help
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 936760455dd5a4..73708acce3ca78 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -288,4 +288,7 @@ config XEN_SYMS
           Exports hypervisor symbols (along with their types and addresses) via
           /proc/xen/xensyms file, similar to /proc/kallsyms
 
+config XEN_HAVE_VPMU
+       bool
+
 endmenu
diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c
index 96453f8a85c554..b5a7342e0ba528 100644
--- a/drivers/xen/sys-hypervisor.c
+++ b/drivers/xen/sys-hypervisor.c
@@ -20,6 +20,9 @@
 #include <xen/xenbus.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/version.h>
+#ifdef CONFIG_XEN_HAVE_VPMU
+#include <xen/interface/xenpmu.h>
+#endif
 
 #define HYPERVISOR_ATTR_RO(_name) \
 static struct hyp_sysfs_attr  _name##_attr = __ATTR_RO(_name)
@@ -368,6 +371,126 @@ static void xen_properties_destroy(void)
 	sysfs_remove_group(hypervisor_kobj, &xen_properties_group);
 }
 
+#ifdef CONFIG_XEN_HAVE_VPMU
+struct pmu_mode {
+	const char *name;
+	uint32_t mode;
+};
+
+static struct pmu_mode pmu_modes[] = {
+	{"off", XENPMU_MODE_OFF},
+	{"self", XENPMU_MODE_SELF},
+	{"hv", XENPMU_MODE_HV},
+	{"all", XENPMU_MODE_ALL}
+};
+
+static ssize_t pmu_mode_store(struct hyp_sysfs_attr *attr,
+			      const char *buffer, size_t len)
+{
+	int ret;
+	struct xen_pmu_params xp;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(pmu_modes); i++) {
+		if (strncmp(buffer, pmu_modes[i].name, len - 1) == 0) {
+			xp.val = pmu_modes[i].mode;
+			break;
+		}
+	}
+
+	if (i == ARRAY_SIZE(pmu_modes))
+		return -EINVAL;
+
+	xp.version.maj = XENPMU_VER_MAJ;
+	xp.version.min = XENPMU_VER_MIN;
+	ret = HYPERVISOR_xenpmu_op(XENPMU_mode_set, &xp);
+	if (ret)
+		return ret;
+
+	return len;
+}
+
+static ssize_t pmu_mode_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret;
+	struct xen_pmu_params xp;
+	int i;
+	uint32_t mode;
+
+	xp.version.maj = XENPMU_VER_MAJ;
+	xp.version.min = XENPMU_VER_MIN;
+	ret = HYPERVISOR_xenpmu_op(XENPMU_mode_get, &xp);
+	if (ret)
+		return ret;
+
+	mode = (uint32_t)xp.val;
+	for (i = 0; i < ARRAY_SIZE(pmu_modes); i++) {
+		if (mode == pmu_modes[i].mode)
+			return sprintf(buffer, "%s\n", pmu_modes[i].name);
+	}
+
+	return -EINVAL;
+}
+HYPERVISOR_ATTR_RW(pmu_mode);
+
+static ssize_t pmu_features_store(struct hyp_sysfs_attr *attr,
+				  const char *buffer, size_t len)
+{
+	int ret;
+	uint32_t features;
+	struct xen_pmu_params xp;
+
+	ret = kstrtou32(buffer, 0, &features);
+	if (ret)
+		return ret;
+
+	xp.val = features;
+	xp.version.maj = XENPMU_VER_MAJ;
+	xp.version.min = XENPMU_VER_MIN;
+	ret = HYPERVISOR_xenpmu_op(XENPMU_feature_set, &xp);
+	if (ret)
+		return ret;
+
+	return len;
+}
+
+static ssize_t pmu_features_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret;
+	struct xen_pmu_params xp;
+
+	xp.version.maj = XENPMU_VER_MAJ;
+	xp.version.min = XENPMU_VER_MIN;
+	ret = HYPERVISOR_xenpmu_op(XENPMU_feature_get, &xp);
+	if (ret)
+		return ret;
+
+	return sprintf(buffer, "0x%x\n", (uint32_t)xp.val);
+}
+HYPERVISOR_ATTR_RW(pmu_features);
+
+static struct attribute *xen_pmu_attrs[] = {
+	&pmu_mode_attr.attr,
+	&pmu_features_attr.attr,
+	NULL
+};
+
+static const struct attribute_group xen_pmu_group = {
+	.name = "pmu",
+	.attrs = xen_pmu_attrs,
+};
+
+static int __init xen_pmu_init(void)
+{
+	return sysfs_create_group(hypervisor_kobj, &xen_pmu_group);
+}
+
+static void xen_pmu_destroy(void)
+{
+	sysfs_remove_group(hypervisor_kobj, &xen_pmu_group);
+}
+#endif
+
 static int __init hyper_sysfs_init(void)
 {
 	int ret;
@@ -390,7 +513,15 @@ static int __init hyper_sysfs_init(void)
 	ret = xen_properties_init();
 	if (ret)
 		goto prop_out;
-
+#ifdef CONFIG_XEN_HAVE_VPMU
+	if (xen_initial_domain()) {
+		ret = xen_pmu_init();
+		if (ret) {
+			xen_properties_destroy();
+			goto prop_out;
+		}
+	}
+#endif
 	goto out;
 
 prop_out:
@@ -407,6 +538,9 @@ static int __init hyper_sysfs_init(void)
 
 static void __exit hyper_sysfs_exit(void)
 {
+#ifdef CONFIG_XEN_HAVE_VPMU
+	xen_pmu_destroy();
+#endif
 	xen_properties_destroy();
 	xen_compilation_destroy();
 	xen_sysfs_uuid_destroy();
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index 8194270edcf00d..e9d4501d1f5e2b 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -80,6 +80,7 @@
 #define __HYPERVISOR_kexec_op             37
 #define __HYPERVISOR_tmem_op              38
 #define __HYPERVISOR_xc_reserved_op       39 /* reserved for XenClient */
+#define __HYPERVISOR_xenpmu_op            40
 
 /* Architecture-specific hypercall definitions. */
 #define __HYPERVISOR_arch_0               48
diff --git a/include/xen/interface/xenpmu.h b/include/xen/interface/xenpmu.h
new file mode 100644
index 00000000000000..eac1b498b89fd5
--- /dev/null
+++ b/include/xen/interface/xenpmu.h
@@ -0,0 +1,59 @@
+#ifndef __XEN_PUBLIC_XENPMU_H__
+#define __XEN_PUBLIC_XENPMU_H__
+
+#include "xen.h"
+
+#define XENPMU_VER_MAJ    0
+#define XENPMU_VER_MIN    1
+
+/*
+ * ` enum neg_errnoval
+ * ` HYPERVISOR_xenpmu_op(enum xenpmu_op cmd, struct xenpmu_params *args);
+ *
+ * @cmd  == XENPMU_* (PMU operation)
+ * @args == struct xenpmu_params
+ */
+/* ` enum xenpmu_op { */
+#define XENPMU_mode_get        0 /* Also used for getting PMU version */
+#define XENPMU_mode_set        1
+#define XENPMU_feature_get     2
+#define XENPMU_feature_set     3
+#define XENPMU_init            4
+#define XENPMU_finish          5
+
+/* ` } */
+
+/* Parameters structure for HYPERVISOR_xenpmu_op call */
+struct xen_pmu_params {
+	/* IN/OUT parameters */
+	struct {
+		uint32_t maj;
+		uint32_t min;
+	} version;
+	uint64_t val;
+
+	/* IN parameters */
+	uint32_t vcpu;
+	uint32_t pad;
+};
+
+/* PMU modes:
+ * - XENPMU_MODE_OFF:   No PMU virtualization
+ * - XENPMU_MODE_SELF:  Guests can profile themselves
+ * - XENPMU_MODE_HV:    Guests can profile themselves, dom0 profiles
+ *                      itself and Xen
+ * - XENPMU_MODE_ALL:   Only dom0 has access to VPMU and it profiles
+ *                      everyone: itself, the hypervisor and the guests.
+ */
+#define XENPMU_MODE_OFF           0
+#define XENPMU_MODE_SELF          (1<<0)
+#define XENPMU_MODE_HV            (1<<1)
+#define XENPMU_MODE_ALL           (1<<2)
+
+/*
+ * PMU features:
+ * - XENPMU_FEATURE_INTEL_BTS: Intel BTS support (ignored on AMD)
+ */
+#define XENPMU_FEATURE_INTEL_BTS  1
+
+#endif /* __XEN_PUBLIC_XENPMU_H__ */

From 65d0cf0be79feebeb19e7626fd3ed41ae73f642d Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Mon, 10 Aug 2015 16:34:34 -0400
Subject: [PATCH 332/734] xen/PMU: Initialization code for Xen PMU

Map shared data structure that will hold CPU registers, VPMU context,
V/PCPU IDs of the CPU interrupted by PMU interrupt. Hypervisor fills
this information in its handler and passes it to the guest for further
processing.

Set up PMU VIRQ.

Now that perf infrastructure will assume that PMU is available on a PV
guest we need to be careful and make sure that accesses via RDPMC
instruction don't cause fatal traps by the hypervisor. Provide a nop
RDPMC handler.

For the same reason avoid issuing a warning on a write to APIC's LVTPC.

Both of these will be made functional in later patches.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/x86/include/asm/xen/interface.h | 123 +++++++++++++++++++
 arch/x86/xen/Makefile                |   2 +-
 arch/x86/xen/apic.c                  |   3 +
 arch/x86/xen/enlighten.c             |  12 +-
 arch/x86/xen/pmu.c                   | 170 +++++++++++++++++++++++++++
 arch/x86/xen/pmu.h                   |  11 ++
 arch/x86/xen/smp.c                   |  29 ++++-
 arch/x86/xen/suspend.c               |  23 +++-
 include/xen/interface/xen.h          |   1 +
 include/xen/interface/xenpmu.h       |  33 ++++++
 10 files changed, 398 insertions(+), 9 deletions(-)
 create mode 100644 arch/x86/xen/pmu.c
 create mode 100644 arch/x86/xen/pmu.h

diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index 3b88eeacdbda2a..62ca03ef5c657c 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -250,6 +250,129 @@ struct vcpu_guest_context {
 #endif
 };
 DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context);
+
+/* AMD PMU registers and structures */
+struct xen_pmu_amd_ctxt {
+	/*
+	 * Offsets to counter and control MSRs (relative to xen_pmu_arch.c.amd).
+	 * For PV(H) guests these fields are RO.
+	 */
+	uint32_t counters;
+	uint32_t ctrls;
+
+	/* Counter MSRs */
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+	uint64_t regs[];
+#elif defined(__GNUC__)
+	uint64_t regs[0];
+#endif
+};
+
+/* Intel PMU registers and structures */
+struct xen_pmu_cntr_pair {
+	uint64_t counter;
+	uint64_t control;
+};
+
+struct xen_pmu_intel_ctxt {
+	/*
+	 * Offsets to fixed and architectural counter MSRs (relative to
+	 * xen_pmu_arch.c.intel).
+	 * For PV(H) guests these fields are RO.
+	 */
+	uint32_t fixed_counters;
+	uint32_t arch_counters;
+
+	/* PMU registers */
+	uint64_t global_ctrl;
+	uint64_t global_ovf_ctrl;
+	uint64_t global_status;
+	uint64_t fixed_ctrl;
+	uint64_t ds_area;
+	uint64_t pebs_enable;
+	uint64_t debugctl;
+
+	/* Fixed and architectural counter MSRs */
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+	uint64_t regs[];
+#elif defined(__GNUC__)
+	uint64_t regs[0];
+#endif
+};
+
+/* Sampled domain's registers */
+struct xen_pmu_regs {
+	uint64_t ip;
+	uint64_t sp;
+	uint64_t flags;
+	uint16_t cs;
+	uint16_t ss;
+	uint8_t cpl;
+	uint8_t pad[3];
+};
+
+/* PMU flags */
+#define PMU_CACHED	   (1<<0) /* PMU MSRs are cached in the context */
+#define PMU_SAMPLE_USER	   (1<<1) /* Sample is from user or kernel mode */
+#define PMU_SAMPLE_REAL	   (1<<2) /* Sample is from realmode */
+#define PMU_SAMPLE_PV	   (1<<3) /* Sample from a PV guest */
+
+/*
+ * Architecture-specific information describing state of the processor at
+ * the time of PMU interrupt.
+ * Fields of this structure marked as RW for guest should only be written by
+ * the guest when PMU_CACHED bit in pmu_flags is set (which is done by the
+ * hypervisor during PMU interrupt). Hypervisor will read updated data in
+ * XENPMU_flush hypercall and clear PMU_CACHED bit.
+ */
+struct xen_pmu_arch {
+	union {
+		/*
+		 * Processor's registers at the time of interrupt.
+		 * WO for hypervisor, RO for guests.
+		 */
+		struct xen_pmu_regs regs;
+		/*
+		 * Padding for adding new registers to xen_pmu_regs in
+		 * the future
+		 */
+#define XENPMU_REGS_PAD_SZ  64
+		uint8_t pad[XENPMU_REGS_PAD_SZ];
+	} r;
+
+	/* WO for hypervisor, RO for guest */
+	uint64_t pmu_flags;
+
+	/*
+	 * APIC LVTPC register.
+	 * RW for both hypervisor and guest.
+	 * Only APIC_LVT_MASKED bit is loaded by the hypervisor into hardware
+	 * during XENPMU_flush or XENPMU_lvtpc_set.
+	 */
+	union {
+		uint32_t lapic_lvtpc;
+		uint64_t pad;
+	} l;
+
+	/*
+	 * Vendor-specific PMU registers.
+	 * RW for both hypervisor and guest (see exceptions above).
+	 * Guest's updates to this field are verified and then loaded by the
+	 * hypervisor into hardware during XENPMU_flush
+	 */
+	union {
+		struct xen_pmu_amd_ctxt amd;
+		struct xen_pmu_intel_ctxt intel;
+
+		/*
+		 * Padding for contexts (fixed parts only, does not include
+		 * MSR banks that are specified by offsets)
+		 */
+#define XENPMU_CTXT_PAD_SZ  128
+		uint8_t pad[XENPMU_CTXT_PAD_SZ];
+	} c;
+};
+
 #endif	/* !__ASSEMBLY__ */
 
 /*
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 4b6e29ac0968c1..e47e52787d32eb 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -13,7 +13,7 @@ CFLAGS_mmu.o			:= $(nostackp)
 obj-y		:= enlighten.o setup.o multicalls.o mmu.o irq.o \
 			time.o xen-asm.o xen-asm_$(BITS).o \
 			grant-table.o suspend.o platform-pci-unplug.o \
-			p2m.o apic.o
+			p2m.o apic.o pmu.o
 
 obj-$(CONFIG_EVENT_TRACING) += trace.o
 
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index 70e060ad879a12..d03ebfa89b9fc7 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -72,6 +72,9 @@ static u32 xen_apic_read(u32 reg)
 
 static void xen_apic_write(u32 reg, u32 val)
 {
+	if (reg == APIC_LVTPC)
+		return;
+
 	/* Warn to see if there's any stray references */
 	WARN(1,"register: %x, value: %x\n", reg, val);
 }
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 373dbc9810d142..19072f91a8e275 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -84,6 +84,7 @@
 #include "mmu.h"
 #include "smp.h"
 #include "multicalls.h"
+#include "pmu.h"
 
 EXPORT_SYMBOL_GPL(hypercall_page);
 
@@ -1082,6 +1083,11 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
 	return ret;
 }
 
+unsigned long long xen_read_pmc(int counter)
+{
+	return 0;
+}
+
 void xen_setup_shared_info(void)
 {
 	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
@@ -1216,7 +1222,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
 	.write_msr = xen_write_msr_safe,
 
 	.read_tsc = native_read_tsc,
-	.read_pmc = native_read_pmc,
+	.read_pmc = xen_read_pmc,
 
 	.read_tscp = native_read_tscp,
 
@@ -1267,6 +1273,10 @@ static const struct pv_apic_ops xen_apic_ops __initconst = {
 static void xen_reboot(int reason)
 {
 	struct sched_shutdown r = { .reason = reason };
+	int cpu;
+
+	for_each_online_cpu(cpu)
+		xen_pmu_finish(cpu);
 
 	if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
 		BUG();
diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
new file mode 100644
index 00000000000000..1d1ae1b874ea87
--- /dev/null
+++ b/arch/x86/xen/pmu.c
@@ -0,0 +1,170 @@
+#include <linux/types.h>
+#include <linux/interrupt.h>
+
+#include <asm/xen/hypercall.h>
+#include <xen/page.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/vcpu.h>
+#include <xen/interface/xenpmu.h>
+
+#include "xen-ops.h"
+#include "pmu.h"
+
+/* x86_pmu.handle_irq definition */
+#include "../kernel/cpu/perf_event.h"
+
+
+/* Shared page between hypervisor and domain */
+static DEFINE_PER_CPU(struct xen_pmu_data *, xenpmu_shared);
+#define get_xenpmu_data()    per_cpu(xenpmu_shared, smp_processor_id())
+
+/* perf callbacks */
+static int xen_is_in_guest(void)
+{
+	const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+
+	if (!xenpmu_data) {
+		pr_warn_once("%s: pmudata not initialized\n", __func__);
+		return 0;
+	}
+
+	if (!xen_initial_domain() || (xenpmu_data->domain_id >= DOMID_SELF))
+		return 0;
+
+	return 1;
+}
+
+static int xen_is_user_mode(void)
+{
+	const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+
+	if (!xenpmu_data) {
+		pr_warn_once("%s: pmudata not initialized\n", __func__);
+		return 0;
+	}
+
+	if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_PV)
+		return (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_USER);
+	else
+		return !!(xenpmu_data->pmu.r.regs.cpl & 3);
+}
+
+static unsigned long xen_get_guest_ip(void)
+{
+	const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+
+	if (!xenpmu_data) {
+		pr_warn_once("%s: pmudata not initialized\n", __func__);
+		return 0;
+	}
+
+	return xenpmu_data->pmu.r.regs.ip;
+}
+
+static struct perf_guest_info_callbacks xen_guest_cbs = {
+	.is_in_guest            = xen_is_in_guest,
+	.is_user_mode           = xen_is_user_mode,
+	.get_guest_ip           = xen_get_guest_ip,
+};
+
+/* Convert registers from Xen's format to Linux' */
+static void xen_convert_regs(const struct xen_pmu_regs *xen_regs,
+			     struct pt_regs *regs, uint64_t pmu_flags)
+{
+	regs->ip = xen_regs->ip;
+	regs->cs = xen_regs->cs;
+	regs->sp = xen_regs->sp;
+
+	if (pmu_flags & PMU_SAMPLE_PV) {
+		if (pmu_flags & PMU_SAMPLE_USER)
+			regs->cs |= 3;
+		else
+			regs->cs &= ~3;
+	} else {
+		if (xen_regs->cpl)
+			regs->cs |= 3;
+		else
+			regs->cs &= ~3;
+	}
+}
+
+irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
+{
+	int ret = IRQ_NONE;
+	struct pt_regs regs;
+	const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+
+	if (!xenpmu_data) {
+		pr_warn_once("%s: pmudata not initialized\n", __func__);
+		return ret;
+	}
+
+	xen_convert_regs(&xenpmu_data->pmu.r.regs, &regs,
+			 xenpmu_data->pmu.pmu_flags);
+	if (x86_pmu.handle_irq(&regs))
+		ret = IRQ_HANDLED;
+
+	return ret;
+}
+
+bool is_xen_pmu(int cpu)
+{
+	return (per_cpu(xenpmu_shared, cpu) != NULL);
+}
+
+void xen_pmu_init(int cpu)
+{
+	int err;
+	struct xen_pmu_params xp;
+	unsigned long pfn;
+	struct xen_pmu_data *xenpmu_data;
+
+	BUILD_BUG_ON(sizeof(struct xen_pmu_data) > PAGE_SIZE);
+
+	if (xen_hvm_domain())
+		return;
+
+	xenpmu_data = (struct xen_pmu_data *)get_zeroed_page(GFP_KERNEL);
+	if (!xenpmu_data) {
+		pr_err("VPMU init: No memory\n");
+		return;
+	}
+	pfn = virt_to_pfn(xenpmu_data);
+
+	xp.val = pfn_to_mfn(pfn);
+	xp.vcpu = cpu;
+	xp.version.maj = XENPMU_VER_MAJ;
+	xp.version.min = XENPMU_VER_MIN;
+	err = HYPERVISOR_xenpmu_op(XENPMU_init, &xp);
+	if (err)
+		goto fail;
+
+	per_cpu(xenpmu_shared, cpu) = xenpmu_data;
+
+	if (cpu == 0)
+		perf_register_guest_info_callbacks(&xen_guest_cbs);
+
+	return;
+
+fail:
+	pr_warn_once("Could not initialize VPMU for cpu %d, error %d\n",
+		cpu, err);
+	free_pages((unsigned long)xenpmu_data, 0);
+}
+
+void xen_pmu_finish(int cpu)
+{
+	struct xen_pmu_params xp;
+
+	if (xen_hvm_domain())
+		return;
+
+	xp.vcpu = cpu;
+	xp.version.maj = XENPMU_VER_MAJ;
+	xp.version.min = XENPMU_VER_MIN;
+
+	(void)HYPERVISOR_xenpmu_op(XENPMU_finish, &xp);
+
+	free_pages((unsigned long)per_cpu(xenpmu_shared, cpu), 0);
+	per_cpu(xenpmu_shared, cpu) = NULL;
+}
diff --git a/arch/x86/xen/pmu.h b/arch/x86/xen/pmu.h
new file mode 100644
index 00000000000000..a76d2cf8358163
--- /dev/null
+++ b/arch/x86/xen/pmu.h
@@ -0,0 +1,11 @@
+#ifndef __XEN_PMU_H
+#define __XEN_PMU_H
+
+#include <xen/interface/xenpmu.h>
+
+irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id);
+void xen_pmu_init(int cpu);
+void xen_pmu_finish(int cpu);
+bool is_xen_pmu(int cpu);
+
+#endif /* __XEN_PMU_H */
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 86484384492e97..2a9ff734279168 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -26,6 +26,7 @@
 
 #include <xen/interface/xen.h>
 #include <xen/interface/vcpu.h>
+#include <xen/interface/xenpmu.h>
 
 #include <asm/xen/interface.h>
 #include <asm/xen/hypercall.h>
@@ -38,6 +39,7 @@
 #include "xen-ops.h"
 #include "mmu.h"
 #include "smp.h"
+#include "pmu.h"
 
 cpumask_var_t xen_cpu_initialized_map;
 
@@ -50,6 +52,7 @@ static DEFINE_PER_CPU(struct xen_common_irq, xen_callfunc_irq) = { .irq = -1 };
 static DEFINE_PER_CPU(struct xen_common_irq, xen_callfuncsingle_irq) = { .irq = -1 };
 static DEFINE_PER_CPU(struct xen_common_irq, xen_irq_work) = { .irq = -1 };
 static DEFINE_PER_CPU(struct xen_common_irq, xen_debug_irq) = { .irq = -1 };
+static DEFINE_PER_CPU(struct xen_common_irq, xen_pmu_irq) = { .irq = -1 };
 
 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
 static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
@@ -148,11 +151,18 @@ static void xen_smp_intr_free(unsigned int cpu)
 		kfree(per_cpu(xen_irq_work, cpu).name);
 		per_cpu(xen_irq_work, cpu).name = NULL;
 	}
+
+	if (per_cpu(xen_pmu_irq, cpu).irq >= 0) {
+		unbind_from_irqhandler(per_cpu(xen_pmu_irq, cpu).irq, NULL);
+		per_cpu(xen_pmu_irq, cpu).irq = -1;
+		kfree(per_cpu(xen_pmu_irq, cpu).name);
+		per_cpu(xen_pmu_irq, cpu).name = NULL;
+	}
 };
 static int xen_smp_intr_init(unsigned int cpu)
 {
 	int rc;
-	char *resched_name, *callfunc_name, *debug_name;
+	char *resched_name, *callfunc_name, *debug_name, *pmu_name;
 
 	resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
 	rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
@@ -218,6 +228,18 @@ static int xen_smp_intr_init(unsigned int cpu)
 	per_cpu(xen_irq_work, cpu).irq = rc;
 	per_cpu(xen_irq_work, cpu).name = callfunc_name;
 
+	if (is_xen_pmu(cpu)) {
+		pmu_name = kasprintf(GFP_KERNEL, "pmu%d", cpu);
+		rc = bind_virq_to_irqhandler(VIRQ_XENPMU, cpu,
+					     xen_pmu_irq_handler,
+					     IRQF_PERCPU|IRQF_NOBALANCING,
+					     pmu_name, NULL);
+		if (rc < 0)
+			goto fail;
+		per_cpu(xen_pmu_irq, cpu).irq = rc;
+		per_cpu(xen_pmu_irq, cpu).name = pmu_name;
+	}
+
 	return 0;
 
  fail:
@@ -335,6 +357,8 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
 	}
 	set_cpu_sibling_map(0);
 
+	xen_pmu_init(0);
+
 	if (xen_smp_intr_init(0))
 		BUG();
 
@@ -462,6 +486,8 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle)
 	if (rc)
 		return rc;
 
+	xen_pmu_init(cpu);
+
 	rc = xen_smp_intr_init(cpu);
 	if (rc)
 		return rc;
@@ -503,6 +529,7 @@ static void xen_cpu_die(unsigned int cpu)
 		xen_smp_intr_free(cpu);
 		xen_uninit_lock_cpu(cpu);
 		xen_teardown_timer(cpu);
+		xen_pmu_finish(cpu);
 	}
 }
 
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 53b4c0811f4f64..feddabdab4488c 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -11,6 +11,7 @@
 
 #include "xen-ops.h"
 #include "mmu.h"
+#include "pmu.h"
 
 static void xen_pv_pre_suspend(void)
 {
@@ -67,16 +68,26 @@ static void xen_pv_post_suspend(int suspend_cancelled)
 
 void xen_arch_pre_suspend(void)
 {
-    if (xen_pv_domain())
-        xen_pv_pre_suspend();
+	int cpu;
+
+	for_each_online_cpu(cpu)
+		xen_pmu_finish(cpu);
+
+	if (xen_pv_domain())
+		xen_pv_pre_suspend();
 }
 
 void xen_arch_post_suspend(int cancelled)
 {
-    if (xen_pv_domain())
-        xen_pv_post_suspend(cancelled);
-    else
-        xen_hvm_post_suspend(cancelled);
+	int cpu;
+
+	if (xen_pv_domain())
+		xen_pv_post_suspend(cancelled);
+	else
+		xen_hvm_post_suspend(cancelled);
+
+	for_each_online_cpu(cpu)
+		xen_pmu_init(cpu);
 }
 
 static void xen_vcpu_notify_restore(void *data)
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index e9d4501d1f5e2b..167071c290b3d2 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -113,6 +113,7 @@
 #define VIRQ_MEM_EVENT  10 /* G. (DOM0) A memory event has occured           */
 #define VIRQ_XC_RESERVED 11 /* G. Reserved for XenClient                     */
 #define VIRQ_ENOMEM     12 /* G. (DOM0) Low on heap memory       */
+#define VIRQ_XENPMU     13  /* PMC interrupt                                 */
 
 /* Architecture-specific VIRQ definitions. */
 #define VIRQ_ARCH_0    16
diff --git a/include/xen/interface/xenpmu.h b/include/xen/interface/xenpmu.h
index eac1b498b89fd5..ca42301949b593 100644
--- a/include/xen/interface/xenpmu.h
+++ b/include/xen/interface/xenpmu.h
@@ -56,4 +56,37 @@ struct xen_pmu_params {
  */
 #define XENPMU_FEATURE_INTEL_BTS  1
 
+/*
+ * Shared PMU data between hypervisor and PV(H) domains.
+ *
+ * The hypervisor fills out this structure during PMU interrupt and sends an
+ * interrupt to appropriate VCPU.
+ * Architecture-independent fields of xen_pmu_data are WO for the hypervisor
+ * and RO for the guest but some fields in xen_pmu_arch can be writable
+ * by both the hypervisor and the guest (see arch-$arch/pmu.h).
+ */
+struct xen_pmu_data {
+	/* Interrupted VCPU */
+	uint32_t vcpu_id;
+
+	/*
+	 * Physical processor on which the interrupt occurred. On non-privileged
+	 * guests set to vcpu_id;
+	 */
+	uint32_t pcpu_id;
+
+	/*
+	 * Domain that was interrupted. On non-privileged guests set to
+	 * DOMID_SELF.
+	 * On privileged guests can be DOMID_SELF, DOMID_XEN, or, when in
+	 * XENPMU_MODE_ALL mode, domain ID of another domain.
+	 */
+	domid_t  domain_id;
+
+	uint8_t pad[6];
+
+	/* Architecture-specific information */
+	struct xen_pmu_arch pmu;
+};
+
 #endif /* __XEN_PUBLIC_XENPMU_H__ */

From e27b72df01109c689062caeba1defa013b759e0e Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Mon, 10 Aug 2015 16:34:35 -0400
Subject: [PATCH 333/734] xen/PMU: Describe vendor-specific PMU registers

AMD and Intel PMU register initialization and helpers that determine
whether a register belongs to PMU.

This and some of subsequent PMU emulation code is somewhat similar to
Xen's PMU implementation.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/x86/xen/pmu.c | 153 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 152 insertions(+), 1 deletion(-)

diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
index 1d1ae1b874ea87..a4a6e4f04f37ab 100644
--- a/arch/x86/xen/pmu.c
+++ b/arch/x86/xen/pmu.c
@@ -18,6 +18,155 @@
 static DEFINE_PER_CPU(struct xen_pmu_data *, xenpmu_shared);
 #define get_xenpmu_data()    per_cpu(xenpmu_shared, smp_processor_id())
 
+
+/* AMD PMU */
+#define F15H_NUM_COUNTERS   6
+#define F10H_NUM_COUNTERS   4
+
+static __read_mostly uint32_t amd_counters_base;
+static __read_mostly uint32_t amd_ctrls_base;
+static __read_mostly int amd_msr_step;
+static __read_mostly int k7_counters_mirrored;
+static __read_mostly int amd_num_counters;
+
+/* Intel PMU */
+#define MSR_TYPE_COUNTER            0
+#define MSR_TYPE_CTRL               1
+#define MSR_TYPE_GLOBAL             2
+#define MSR_TYPE_ARCH_COUNTER       3
+#define MSR_TYPE_ARCH_CTRL          4
+
+/* Number of general pmu registers (CPUID.EAX[0xa].EAX[8..15]) */
+#define PMU_GENERAL_NR_SHIFT        8
+#define PMU_GENERAL_NR_BITS         8
+#define PMU_GENERAL_NR_MASK         (((1 << PMU_GENERAL_NR_BITS) - 1) \
+				     << PMU_GENERAL_NR_SHIFT)
+
+/* Number of fixed pmu registers (CPUID.EDX[0xa].EDX[0..4]) */
+#define PMU_FIXED_NR_SHIFT          0
+#define PMU_FIXED_NR_BITS           5
+#define PMU_FIXED_NR_MASK           (((1 << PMU_FIXED_NR_BITS) - 1) \
+				     << PMU_FIXED_NR_SHIFT)
+
+/* Alias registers (0x4c1) for full-width writes to PMCs */
+#define MSR_PMC_ALIAS_MASK          (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_PMC0))
+
+static __read_mostly int intel_num_arch_counters, intel_num_fixed_counters;
+
+
+static void xen_pmu_arch_init(void)
+{
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+
+		switch (boot_cpu_data.x86) {
+		case 0x15:
+			amd_num_counters = F15H_NUM_COUNTERS;
+			amd_counters_base = MSR_F15H_PERF_CTR;
+			amd_ctrls_base = MSR_F15H_PERF_CTL;
+			amd_msr_step = 2;
+			k7_counters_mirrored = 1;
+			break;
+		case 0x10:
+		case 0x12:
+		case 0x14:
+		case 0x16:
+		default:
+			amd_num_counters = F10H_NUM_COUNTERS;
+			amd_counters_base = MSR_K7_PERFCTR0;
+			amd_ctrls_base = MSR_K7_EVNTSEL0;
+			amd_msr_step = 1;
+			k7_counters_mirrored = 0;
+			break;
+		}
+	} else {
+		uint32_t eax, ebx, ecx, edx;
+
+		cpuid(0xa, &eax, &ebx, &ecx, &edx);
+
+		intel_num_arch_counters = (eax & PMU_GENERAL_NR_MASK) >>
+			PMU_GENERAL_NR_SHIFT;
+		intel_num_fixed_counters = (edx & PMU_FIXED_NR_MASK) >>
+			PMU_FIXED_NR_SHIFT;
+	}
+}
+
+static inline uint32_t get_fam15h_addr(u32 addr)
+{
+	switch (addr) {
+	case MSR_K7_PERFCTR0:
+	case MSR_K7_PERFCTR1:
+	case MSR_K7_PERFCTR2:
+	case MSR_K7_PERFCTR3:
+		return MSR_F15H_PERF_CTR + (addr - MSR_K7_PERFCTR0);
+	case MSR_K7_EVNTSEL0:
+	case MSR_K7_EVNTSEL1:
+	case MSR_K7_EVNTSEL2:
+	case MSR_K7_EVNTSEL3:
+		return MSR_F15H_PERF_CTL + (addr - MSR_K7_EVNTSEL0);
+	default:
+		break;
+	}
+
+	return addr;
+}
+
+static inline bool is_amd_pmu_msr(unsigned int msr)
+{
+	if ((msr >= MSR_F15H_PERF_CTL &&
+	     msr < MSR_F15H_PERF_CTR + (amd_num_counters * 2)) ||
+	    (msr >= MSR_K7_EVNTSEL0 &&
+	     msr < MSR_K7_PERFCTR0 + amd_num_counters))
+		return true;
+
+	return false;
+}
+
+static int is_intel_pmu_msr(u32 msr_index, int *type, int *index)
+{
+	u32 msr_index_pmc;
+
+	switch (msr_index) {
+	case MSR_CORE_PERF_FIXED_CTR_CTRL:
+	case MSR_IA32_DS_AREA:
+	case MSR_IA32_PEBS_ENABLE:
+		*type = MSR_TYPE_CTRL;
+		return true;
+
+	case MSR_CORE_PERF_GLOBAL_CTRL:
+	case MSR_CORE_PERF_GLOBAL_STATUS:
+	case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+		*type = MSR_TYPE_GLOBAL;
+		return true;
+
+	default:
+
+		if ((msr_index >= MSR_CORE_PERF_FIXED_CTR0) &&
+		    (msr_index < MSR_CORE_PERF_FIXED_CTR0 +
+				 intel_num_fixed_counters)) {
+			*index = msr_index - MSR_CORE_PERF_FIXED_CTR0;
+			*type = MSR_TYPE_COUNTER;
+			return true;
+		}
+
+		if ((msr_index >= MSR_P6_EVNTSEL0) &&
+		    (msr_index < MSR_P6_EVNTSEL0 +  intel_num_arch_counters)) {
+			*index = msr_index - MSR_P6_EVNTSEL0;
+			*type = MSR_TYPE_ARCH_CTRL;
+			return true;
+		}
+
+		msr_index_pmc = msr_index & MSR_PMC_ALIAS_MASK;
+		if ((msr_index_pmc >= MSR_IA32_PERFCTR0) &&
+		    (msr_index_pmc < MSR_IA32_PERFCTR0 +
+				     intel_num_arch_counters)) {
+			*type = MSR_TYPE_ARCH_COUNTER;
+			*index = msr_index_pmc - MSR_IA32_PERFCTR0;
+			return true;
+		}
+		return false;
+	}
+}
+
 /* perf callbacks */
 static int xen_is_in_guest(void)
 {
@@ -141,8 +290,10 @@ void xen_pmu_init(int cpu)
 
 	per_cpu(xenpmu_shared, cpu) = xenpmu_data;
 
-	if (cpu == 0)
+	if (cpu == 0) {
 		perf_register_guest_info_callbacks(&xen_guest_cbs);
+		xen_pmu_arch_init();
+	}
 
 	return;
 

From 6b08cd6328c58a2ae190c5ee03a2ffcab5ef828e Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Mon, 10 Aug 2015 16:34:36 -0400
Subject: [PATCH 334/734] xen/PMU: Intercept PMU-related MSR and APIC accesses

Provide interfaces for recognizing accesses to PMU-related MSRs and
LVTPC APIC and process these accesses in Xen PMU code.

(The interrupt handler performs XENPMU_flush right away in the beginning
since no PMU emulation is available. It will be added with a later patch).

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/x86/xen/apic.c            |  5 +-
 arch/x86/xen/enlighten.c       | 11 ++--
 arch/x86/xen/pmu.c             | 95 +++++++++++++++++++++++++++++++++-
 arch/x86/xen/pmu.h             |  4 ++
 include/xen/interface/xenpmu.h |  2 +
 5 files changed, 109 insertions(+), 8 deletions(-)

diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index d03ebfa89b9fc7..acda713ab5beab 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -7,6 +7,7 @@
 #include <xen/xen.h>
 #include <xen/interface/physdev.h>
 #include "xen-ops.h"
+#include "pmu.h"
 #include "smp.h"
 
 static unsigned int xen_io_apic_read(unsigned apic, unsigned reg)
@@ -72,8 +73,10 @@ static u32 xen_apic_read(u32 reg)
 
 static void xen_apic_write(u32 reg, u32 val)
 {
-	if (reg == APIC_LVTPC)
+	if (reg == APIC_LVTPC) {
+		(void)pmu_apic_update(reg);
 		return;
+	}
 
 	/* Warn to see if there's any stray references */
 	WARN(1,"register: %x, value: %x\n", reg, val);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 19072f91a8e275..fdaba49f67590f 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1031,6 +1031,9 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err)
 {
 	u64 val;
 
+	if (pmu_msr_read(msr, &val, err))
+		return val;
+
 	val = native_read_msr_safe(msr, err);
 	switch (msr) {
 	case MSR_IA32_APICBASE:
@@ -1077,17 +1080,13 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
 		   Xen console noise. */
 
 	default:
-		ret = native_write_msr_safe(msr, low, high);
+		if (!pmu_msr_write(msr, low, high, &ret))
+			ret = native_write_msr_safe(msr, low, high);
 	}
 
 	return ret;
 }
 
-unsigned long long xen_read_pmc(int counter)
-{
-	return 0;
-}
-
 void xen_setup_shared_info(void)
 {
 	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
index a4a6e4f04f37ab..f92b908e005f5e 100644
--- a/arch/x86/xen/pmu.c
+++ b/arch/x86/xen/pmu.c
@@ -51,6 +51,8 @@ static __read_mostly int amd_num_counters;
 /* Alias registers (0x4c1) for full-width writes to PMCs */
 #define MSR_PMC_ALIAS_MASK          (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_PMC0))
 
+#define INTEL_PMC_TYPE_SHIFT        30
+
 static __read_mostly int intel_num_arch_counters, intel_num_fixed_counters;
 
 
@@ -167,6 +169,91 @@ static int is_intel_pmu_msr(u32 msr_index, int *type, int *index)
 	}
 }
 
+bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err)
+{
+
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+		if (is_amd_pmu_msr(msr)) {
+			*val = native_read_msr_safe(msr, err);
+			return true;
+		}
+	} else {
+		int type, index;
+
+		if (is_intel_pmu_msr(msr, &type, &index)) {
+			*val = native_read_msr_safe(msr, err);
+			return true;
+		}
+	}
+
+	return false;
+}
+
+bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err)
+{
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+		if (is_amd_pmu_msr(msr)) {
+			*err = native_write_msr_safe(msr, low, high);
+			return true;
+		}
+	} else {
+		int type, index;
+
+		if (is_intel_pmu_msr(msr, &type, &index)) {
+			*err = native_write_msr_safe(msr, low, high);
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static unsigned long long xen_amd_read_pmc(int counter)
+{
+	uint32_t msr;
+	int err;
+
+	msr = amd_counters_base + (counter * amd_msr_step);
+	return native_read_msr_safe(msr, &err);
+}
+
+static unsigned long long xen_intel_read_pmc(int counter)
+{
+	int err;
+	uint32_t msr;
+
+	if (counter & (1<<INTEL_PMC_TYPE_SHIFT))
+		msr = MSR_CORE_PERF_FIXED_CTR0 + (counter & 0xffff);
+	else
+		msr = MSR_IA32_PERFCTR0 + counter;
+
+	return native_read_msr_safe(msr, &err);
+}
+
+unsigned long long xen_read_pmc(int counter)
+{
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+		return xen_amd_read_pmc(counter);
+	else
+		return xen_intel_read_pmc(counter);
+}
+
+int pmu_apic_update(uint32_t val)
+{
+	int ret;
+	struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+
+	if (!xenpmu_data) {
+		pr_warn_once("%s: pmudata not initialized\n", __func__);
+		return -EINVAL;
+	}
+
+	xenpmu_data->pmu.l.lapic_lvtpc = val;
+	ret = HYPERVISOR_xenpmu_op(XENPMU_lvtpc_set, NULL);
+
+	return ret;
+}
+
 /* perf callbacks */
 static int xen_is_in_guest(void)
 {
@@ -239,7 +326,7 @@ static void xen_convert_regs(const struct xen_pmu_regs *xen_regs,
 
 irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
 {
-	int ret = IRQ_NONE;
+	int err, ret = IRQ_NONE;
 	struct pt_regs regs;
 	const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
 
@@ -248,6 +335,12 @@ irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
 		return ret;
 	}
 
+	err = HYPERVISOR_xenpmu_op(XENPMU_flush, NULL);
+	if (err) {
+		pr_warn_once("%s: failed hypercall, err: %d\n", __func__, err);
+		return ret;
+	}
+
 	xen_convert_regs(&xenpmu_data->pmu.r.regs, &regs,
 			 xenpmu_data->pmu.pmu_flags);
 	if (x86_pmu.handle_irq(&regs))
diff --git a/arch/x86/xen/pmu.h b/arch/x86/xen/pmu.h
index a76d2cf8358163..af5f0ad94078f7 100644
--- a/arch/x86/xen/pmu.h
+++ b/arch/x86/xen/pmu.h
@@ -7,5 +7,9 @@ irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id);
 void xen_pmu_init(int cpu);
 void xen_pmu_finish(int cpu);
 bool is_xen_pmu(int cpu);
+bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err);
+bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err);
+int pmu_apic_update(uint32_t reg);
+unsigned long long xen_read_pmc(int counter);
 
 #endif /* __XEN_PMU_H */
diff --git a/include/xen/interface/xenpmu.h b/include/xen/interface/xenpmu.h
index ca42301949b593..139efc91bceb50 100644
--- a/include/xen/interface/xenpmu.h
+++ b/include/xen/interface/xenpmu.h
@@ -20,6 +20,8 @@
 #define XENPMU_feature_set     3
 #define XENPMU_init            4
 #define XENPMU_finish          5
+#define XENPMU_lvtpc_set       6
+#define XENPMU_flush           7
 
 /* ` } */
 

From bf6dfb154d935725c9a2005033ca33017b9df439 Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Mon, 10 Aug 2015 16:34:37 -0400
Subject: [PATCH 335/734] xen/PMU: PMU emulation code

Add PMU emulation code that runs when we are processing a PMU interrupt.
This code will allow us not to trap to hypervisor on each MSR/LVTPC access
(of which there may be quite a few in the handler).

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/x86/xen/pmu.c | 214 +++++++++++++++++++++++++++++++++++++++------
 1 file changed, 185 insertions(+), 29 deletions(-)

diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
index f92b908e005f5e..724a08740a04b4 100644
--- a/arch/x86/xen/pmu.c
+++ b/arch/x86/xen/pmu.c
@@ -13,11 +13,20 @@
 /* x86_pmu.handle_irq definition */
 #include "../kernel/cpu/perf_event.h"
 
+#define XENPMU_IRQ_PROCESSING    1
+struct xenpmu {
+	/* Shared page between hypervisor and domain */
+	struct xen_pmu_data *xenpmu_data;
 
-/* Shared page between hypervisor and domain */
-static DEFINE_PER_CPU(struct xen_pmu_data *, xenpmu_shared);
-#define get_xenpmu_data()    per_cpu(xenpmu_shared, smp_processor_id())
+	uint8_t flags;
+};
+static DEFINE_PER_CPU(struct xenpmu, xenpmu_shared);
+#define get_xenpmu_data()    (this_cpu_ptr(&xenpmu_shared)->xenpmu_data)
+#define get_xenpmu_flags()   (this_cpu_ptr(&xenpmu_shared)->flags)
 
+/* Macro for computing address of a PMU MSR bank */
+#define field_offset(ctxt, field) ((void *)((uintptr_t)ctxt + \
+					    (uintptr_t)ctxt->field))
 
 /* AMD PMU */
 #define F15H_NUM_COUNTERS   6
@@ -169,19 +178,124 @@ static int is_intel_pmu_msr(u32 msr_index, int *type, int *index)
 	}
 }
 
-bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err)
+static bool xen_intel_pmu_emulate(unsigned int msr, u64 *val, int type,
+				  int index, bool is_read)
 {
+	uint64_t *reg = NULL;
+	struct xen_pmu_intel_ctxt *ctxt;
+	uint64_t *fix_counters;
+	struct xen_pmu_cntr_pair *arch_cntr_pair;
+	struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+	uint8_t xenpmu_flags = get_xenpmu_flags();
+
 
+	if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING))
+		return false;
+
+	ctxt = &xenpmu_data->pmu.c.intel;
+
+	switch (msr) {
+	case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+		reg = &ctxt->global_ovf_ctrl;
+		break;
+	case MSR_CORE_PERF_GLOBAL_STATUS:
+		reg = &ctxt->global_status;
+		break;
+	case MSR_CORE_PERF_GLOBAL_CTRL:
+		reg = &ctxt->global_ctrl;
+		break;
+	case MSR_CORE_PERF_FIXED_CTR_CTRL:
+		reg = &ctxt->fixed_ctrl;
+		break;
+	default:
+		switch (type) {
+		case MSR_TYPE_COUNTER:
+			fix_counters = field_offset(ctxt, fixed_counters);
+			reg = &fix_counters[index];
+			break;
+		case MSR_TYPE_ARCH_COUNTER:
+			arch_cntr_pair = field_offset(ctxt, arch_counters);
+			reg = &arch_cntr_pair[index].counter;
+			break;
+		case MSR_TYPE_ARCH_CTRL:
+			arch_cntr_pair = field_offset(ctxt, arch_counters);
+			reg = &arch_cntr_pair[index].control;
+			break;
+		default:
+			return false;
+		}
+	}
+
+	if (reg) {
+		if (is_read)
+			*val = *reg;
+		else {
+			*reg = *val;
+
+			if (msr == MSR_CORE_PERF_GLOBAL_OVF_CTRL)
+				ctxt->global_status &= (~(*val));
+		}
+		return true;
+	}
+
+	return false;
+}
+
+static bool xen_amd_pmu_emulate(unsigned int msr, u64 *val, bool is_read)
+{
+	uint64_t *reg = NULL;
+	int i, off = 0;
+	struct xen_pmu_amd_ctxt *ctxt;
+	uint64_t *counter_regs, *ctrl_regs;
+	struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+	uint8_t xenpmu_flags = get_xenpmu_flags();
+
+	if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING))
+		return false;
+
+	if (k7_counters_mirrored &&
+	    ((msr >= MSR_K7_EVNTSEL0) && (msr <= MSR_K7_PERFCTR3)))
+		msr = get_fam15h_addr(msr);
+
+	ctxt = &xenpmu_data->pmu.c.amd;
+	for (i = 0; i < amd_num_counters; i++) {
+		if (msr == amd_ctrls_base + off) {
+			ctrl_regs = field_offset(ctxt, ctrls);
+			reg = &ctrl_regs[i];
+			break;
+		} else if (msr == amd_counters_base + off) {
+			counter_regs = field_offset(ctxt, counters);
+			reg = &counter_regs[i];
+			break;
+		}
+		off += amd_msr_step;
+	}
+
+	if (reg) {
+		if (is_read)
+			*val = *reg;
+		else
+			*reg = *val;
+
+		return true;
+	}
+	return false;
+}
+
+bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err)
+{
 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
 		if (is_amd_pmu_msr(msr)) {
-			*val = native_read_msr_safe(msr, err);
+			if (!xen_amd_pmu_emulate(msr, val, 1))
+				*val = native_read_msr_safe(msr, err);
 			return true;
 		}
 	} else {
 		int type, index;
 
 		if (is_intel_pmu_msr(msr, &type, &index)) {
-			*val = native_read_msr_safe(msr, err);
+			if (!xen_intel_pmu_emulate(msr, val, type, index, 1))
+				*val = native_read_msr_safe(msr, err);
 			return true;
 		}
 	}
@@ -191,16 +305,20 @@ bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err)
 
 bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err)
 {
+	uint64_t val = ((uint64_t)high << 32) | low;
+
 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
 		if (is_amd_pmu_msr(msr)) {
-			*err = native_write_msr_safe(msr, low, high);
+			if (!xen_amd_pmu_emulate(msr, &val, 0))
+				*err = native_write_msr_safe(msr, low, high);
 			return true;
 		}
 	} else {
 		int type, index;
 
 		if (is_intel_pmu_msr(msr, &type, &index)) {
-			*err = native_write_msr_safe(msr, low, high);
+			if (!xen_intel_pmu_emulate(msr, &val, type, index, 0))
+				*err = native_write_msr_safe(msr, low, high);
 			return true;
 		}
 	}
@@ -210,24 +328,52 @@ bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err)
 
 static unsigned long long xen_amd_read_pmc(int counter)
 {
-	uint32_t msr;
-	int err;
+	struct xen_pmu_amd_ctxt *ctxt;
+	uint64_t *counter_regs;
+	struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+	uint8_t xenpmu_flags = get_xenpmu_flags();
+
+	if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) {
+		uint32_t msr;
+		int err;
 
-	msr = amd_counters_base + (counter * amd_msr_step);
-	return native_read_msr_safe(msr, &err);
+		msr = amd_counters_base + (counter * amd_msr_step);
+		return native_read_msr_safe(msr, &err);
+	}
+
+	ctxt = &xenpmu_data->pmu.c.amd;
+	counter_regs = field_offset(ctxt, counters);
+	return counter_regs[counter];
 }
 
 static unsigned long long xen_intel_read_pmc(int counter)
 {
-	int err;
-	uint32_t msr;
+	struct xen_pmu_intel_ctxt *ctxt;
+	uint64_t *fixed_counters;
+	struct xen_pmu_cntr_pair *arch_cntr_pair;
+	struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+	uint8_t xenpmu_flags = get_xenpmu_flags();
 
-	if (counter & (1<<INTEL_PMC_TYPE_SHIFT))
-		msr = MSR_CORE_PERF_FIXED_CTR0 + (counter & 0xffff);
-	else
-		msr = MSR_IA32_PERFCTR0 + counter;
+	if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) {
+		uint32_t msr;
+		int err;
 
-	return native_read_msr_safe(msr, &err);
+		if (counter & (1 << INTEL_PMC_TYPE_SHIFT))
+			msr = MSR_CORE_PERF_FIXED_CTR0 + (counter & 0xffff);
+		else
+			msr = MSR_IA32_PERFCTR0 + counter;
+
+		return native_read_msr_safe(msr, &err);
+	}
+
+	ctxt = &xenpmu_data->pmu.c.intel;
+	if (counter & (1 << INTEL_PMC_TYPE_SHIFT)) {
+		fixed_counters = field_offset(ctxt, fixed_counters);
+		return fixed_counters[counter & 0xffff];
+	}
+
+	arch_cntr_pair = field_offset(ctxt, arch_counters);
+	return arch_cntr_pair[counter].counter;
 }
 
 unsigned long long xen_read_pmc(int counter)
@@ -249,6 +395,10 @@ int pmu_apic_update(uint32_t val)
 	}
 
 	xenpmu_data->pmu.l.lapic_lvtpc = val;
+
+	if (get_xenpmu_flags() & XENPMU_IRQ_PROCESSING)
+		return 0;
+
 	ret = HYPERVISOR_xenpmu_op(XENPMU_lvtpc_set, NULL);
 
 	return ret;
@@ -329,29 +479,34 @@ irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
 	int err, ret = IRQ_NONE;
 	struct pt_regs regs;
 	const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+	uint8_t xenpmu_flags = get_xenpmu_flags();
 
 	if (!xenpmu_data) {
 		pr_warn_once("%s: pmudata not initialized\n", __func__);
 		return ret;
 	}
 
-	err = HYPERVISOR_xenpmu_op(XENPMU_flush, NULL);
-	if (err) {
-		pr_warn_once("%s: failed hypercall, err: %d\n", __func__, err);
-		return ret;
-	}
-
+	this_cpu_ptr(&xenpmu_shared)->flags =
+		xenpmu_flags | XENPMU_IRQ_PROCESSING;
 	xen_convert_regs(&xenpmu_data->pmu.r.regs, &regs,
 			 xenpmu_data->pmu.pmu_flags);
 	if (x86_pmu.handle_irq(&regs))
 		ret = IRQ_HANDLED;
 
+	/* Write out cached context to HW */
+	err = HYPERVISOR_xenpmu_op(XENPMU_flush, NULL);
+	this_cpu_ptr(&xenpmu_shared)->flags = xenpmu_flags;
+	if (err) {
+		pr_warn_once("%s: failed hypercall, err: %d\n", __func__, err);
+		return IRQ_NONE;
+	}
+
 	return ret;
 }
 
 bool is_xen_pmu(int cpu)
 {
-	return (per_cpu(xenpmu_shared, cpu) != NULL);
+	return (get_xenpmu_data() != NULL);
 }
 
 void xen_pmu_init(int cpu)
@@ -381,7 +536,8 @@ void xen_pmu_init(int cpu)
 	if (err)
 		goto fail;
 
-	per_cpu(xenpmu_shared, cpu) = xenpmu_data;
+	per_cpu(xenpmu_shared, cpu).xenpmu_data = xenpmu_data;
+	per_cpu(xenpmu_shared, cpu).flags = 0;
 
 	if (cpu == 0) {
 		perf_register_guest_info_callbacks(&xen_guest_cbs);
@@ -409,6 +565,6 @@ void xen_pmu_finish(int cpu)
 
 	(void)HYPERVISOR_xenpmu_op(XENPMU_finish, &xp);
 
-	free_pages((unsigned long)per_cpu(xenpmu_shared, cpu), 0);
-	per_cpu(xenpmu_shared, cpu) = NULL;
+	free_pages((unsigned long)per_cpu(xenpmu_shared, cpu).xenpmu_data, 0);
+	per_cpu(xenpmu_shared, cpu).xenpmu_data = NULL;
 }

From 3375d8284dfb7866f261ec008d15d30999ff273b Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Mon, 10 Aug 2015 16:34:38 -0400
Subject: [PATCH 336/734] xen/x86: Don't try to set PCE bit in CR4

Since VPMU code emulates RDPMC instruction with RDMSR and because hypervisor
does not emulate it there is no reason to try setting CR4's PCE bit (and the
hypervisor will warn on seeing it set).

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/x86/xen/enlighten.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index fdaba49f67590f..25309c1683117a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1011,8 +1011,7 @@ static void xen_write_cr0(unsigned long cr0)
 
 static void xen_write_cr4(unsigned long cr4)
 {
-	cr4 &= ~X86_CR4_PGE;
-	cr4 &= ~X86_CR4_PSE;
+	cr4 &= ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PCE);
 
 	native_write_cr4(cr4);
 }

From 724afaea2020f3bd98891b535f3ce5d3935bcf63 Mon Sep 17 00:00:00 2001
From: Julien Grall <julien.grall@citrix.com>
Date: Fri, 7 Aug 2015 17:34:34 +0100
Subject: [PATCH 337/734] arm/xen: Remove helpers which are PV specific

ARM guests are always HVM. The current implementation is assuming a 1:1
mapping which is only true for DOM0 and may not be at all in the future.

Furthermore, all the helpers but arbitrary_virt_to_machine are used in
x86 specific code (or only compiled for).

The helper arbitrary_virt_to_machine is only used in PV specific code.
Therefore we should never call the function.

Add a BUG() in this helper and drop all the others.

Signed-off-by: Julien Grall <julien.grall@citrix.com>
Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/arm/include/asm/xen/page.h | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h
index 1bee8ca124945c..98b1084f8282e8 100644
--- a/arch/arm/include/asm/xen/page.h
+++ b/arch/arm/include/asm/xen/page.h
@@ -54,26 +54,14 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
 
 #define mfn_to_local_pfn(mfn) mfn_to_pfn(mfn)
 
-static inline xmaddr_t phys_to_machine(xpaddr_t phys)
-{
-	unsigned offset = phys.paddr & ~PAGE_MASK;
-	return XMADDR(PFN_PHYS(pfn_to_mfn(PFN_DOWN(phys.paddr))) | offset);
-}
-
-static inline xpaddr_t machine_to_phys(xmaddr_t machine)
-{
-	unsigned offset = machine.maddr & ~PAGE_MASK;
-	return XPADDR(PFN_PHYS(mfn_to_pfn(PFN_DOWN(machine.maddr))) | offset);
-}
 /* VIRT <-> MACHINE conversion */
-#define virt_to_machine(v)	(phys_to_machine(XPADDR(__pa(v))))
 #define virt_to_mfn(v)		(pfn_to_mfn(virt_to_pfn(v)))
 #define mfn_to_virt(m)		(__va(mfn_to_pfn(m) << PAGE_SHIFT))
 
+/* Only used in PV code. But ARM guests are always HVM. */
 static inline xmaddr_t arbitrary_virt_to_machine(void *vaddr)
 {
-	/* TODO: assuming it is mapped in the kernel 1:1 */
-	return virt_to_machine(vaddr);
+	BUG();
 }
 
 /* TODO: this shouldn't be here but it is because the frontend drivers

From 78a6854e219ba266b6cc12f840b571c5f1168b5e Mon Sep 17 00:00:00 2001
From: Sam Bobroff <sam.bobroff@au1.ibm.com>
Date: Mon, 20 Jul 2015 15:12:19 +1000
Subject: [PATCH 338/734] merge_config.sh: exit on missing input files

Add a check for the existence of input files and exit (with failure)
if they are missing.

Without this additional check, missing files produce error messages
but still result in an output file being generated and a successful
exit code.

Signed-off-by: Sam Bobroff <sam.bobroff@au1.ibm.com>
Signed-off-by: Michal Marek <mmarek@suse.com>
---
 scripts/kconfig/merge_config.sh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/scripts/kconfig/merge_config.sh b/scripts/kconfig/merge_config.sh
index ec8e20350a648c..0d883b37882a14 100755
--- a/scripts/kconfig/merge_config.sh
+++ b/scripts/kconfig/merge_config.sh
@@ -100,6 +100,10 @@ cat $INITFILE > $TMP_FILE
 # Merge files, printing warnings on overridden values
 for MERGE_FILE in $MERGE_LIST ; do
 	echo "Merging $MERGE_FILE"
+	if [ ! -r "$MERGE_FILE" ]; then
+		echo "The merge file '$MERGE_FILE' does not exist.  Exit." >&2
+		exit 1
+	fi
 	CFG_LIST=$(sed -n "$SED_CONFIG_EXP" $MERGE_FILE)
 
 	for CFG in $CFG_LIST ; do

From 3c0561e0043fe870426b445890ab93204332ae39 Mon Sep 17 00:00:00 2001
From: Pavel Fedin <p.fedin@samsung.com>
Date: Fri, 14 Aug 2015 13:52:13 +0300
Subject: [PATCH 339/734] Avoid conflict with host definitions when
 cross-compiling

Certain platforms (e. g. BSD-based ones) define some ELF constants
according to host. This patch fixes problems with cross-building
Linux kernel on these platforms (e. g. building ARM 32-bit version
on x86-64 host).

Signed-off-by: Pavel Fedin <p.fedin@samsung.com>
Signed-off-by: Michal Marek <mmarek@suse.com>
---
 scripts/mod/modpost.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
index 168b43dc0a59b6..6a5e1515123b3b 100644
--- a/scripts/mod/modpost.h
+++ b/scripts/mod/modpost.h
@@ -11,6 +11,12 @@
 
 #include "elfconfig.h"
 
+/* On BSD-alike OSes elf.h defines these according to host's word size */
+#undef ELF_ST_BIND
+#undef ELF_ST_TYPE
+#undef ELF_R_SYM
+#undef ELF_R_TYPE
+
 #if KERNEL_ELFCLASS == ELFCLASS32
 
 #define Elf_Ehdr    Elf32_Ehdr

From 9bebe9e5b0f3109a14000df25308c2971f872605 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Sun, 19 Jul 2015 18:01:19 -0700
Subject: [PATCH 340/734] kbuild: Fix .text.unlikely placement

When building a kernel with .text.unlikely text the unlikely text for
each translation unit was put next to the main .text code in the
final vmlinux.

The problem is that the linker doesn't allow more specific submatches
of a section name in a different linker script statement after the
main match.

So we need to move them all into one line. With that change
.text.unlikely is at the end of everything again.

I also moved .text.hot into the same statement though, even though
that's not strictly needed.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Michal Marek <mmarek@suse.com>
---
 include/asm-generic/vmlinux.lds.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 8bd374d3cf21fb..1781e54ea6d307 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -412,12 +412,10 @@
  * during second ld run in second ld pass when generating System.map */
 #define TEXT_TEXT							\
 		ALIGN_FUNCTION();					\
-		*(.text.hot)						\
-		*(.text .text.fixup)					\
+		*(.text.hot .text .text.fixup .text.unlikely)		\
 		*(.ref.text)						\
 	MEM_KEEP(init.text)						\
 	MEM_KEEP(exit.text)						\
-		*(.text.unlikely)
 
 
 /* sched.text is aling to function alignment to secure we have same

From 1c722503fa81888c936a8d1a5052daec859f1a7c Mon Sep 17 00:00:00 2001
From: Richard Yao <richard.yao@clusterhq.com>
Date: Mon, 20 Jul 2015 19:52:48 -0400
Subject: [PATCH 341/734] genksyms: Duplicate function pointer type definitions
 segfault

I noticed that genksyms will segfault when it sees duplicate function
pointer type declaration when I placed the same function pointer
definition in two separate headers in a local branch as an intermediate
step of some refactoring. This can be reproduced by piping the following
minimal test case into `genksyms -r /dev/null` or alternatively, putting
it into a C file attempting a build:

typedef int (*f)();
typedef int (*f)();

Attaching gdb to genksyms to understand this failure is useless without
changing CFLAGS to emit debuginfo. Once you have debuginfo, you will
find that the failure is that `char *s` was NULL and the program
executed `while(*s)`. At which point, further debugging requires
familiarity with compiler front end / parser development.

What happens is that flex identifies the first instance of the token "f"
as IDENT and the yacc parser adds it to the symbol table. On the second
instance, flex will identify "f" as TYPE, which triggers an error case
in the yacc parser. Given that TYPE would have been IDENT had it not
been in the symbol table, the the segmentaion fault could be avoided by
treating TYPE as IDENT in the affected rule.

Some might consider placing identical function pointer type declarations
in different headers to be poor style might consider a failure to be
beneficial. However, failing through a segmentation fault makes the
cause non-obvious and can waste the time of anyone who encounters it.

Signed-off-by: Richard Yao <richard.yao@clusterhq.com>
Acked-by: Madhuri Yechuri <madhuriyechuri@clusterhq.com>
Signed-off-by: Michal Marek <mmarek@suse.com>
---
 scripts/genksyms/parse.y | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/scripts/genksyms/parse.y b/scripts/genksyms/parse.y
index b9f4cf202302d4..723ab30fe9d469 100644
--- a/scripts/genksyms/parse.y
+++ b/scripts/genksyms/parse.y
@@ -303,6 +303,15 @@ direct_declarator:
 		    $$ = $1;
 		  }
 		}
+	| TYPE
+		{ if (current_name != NULL) {
+		    error_with_pos("unexpected second declaration name");
+		    YYERROR;
+		  } else {
+		    current_name = (*$1)->string;
+		    $$ = $1;
+		  }
+		}
 	| direct_declarator '(' parameter_declaration_clause ')'
 		{ $$ = $4; }
 	| direct_declarator '(' error ')'

From 5b733faca671756dd41b7e24584374e2b1fc3c4d Mon Sep 17 00:00:00 2001
From: Michal Marek <mmarek@suse.com>
Date: Thu, 20 Aug 2015 14:13:35 +0200
Subject: [PATCH 342/734] genksyms: Regenerate parser

Rebuild the parser after commit 1c722503fa81 (genksyms: Duplicate
function pointer type definitions segfault), using bison 2.7.

Signed-off-by: Michal Marek <mmarek@suse.com>
---
 scripts/genksyms/parse.tab.c_shipped | 671 +++++++++++++--------------
 scripts/genksyms/parse.tab.h_shipped |  26 +-
 2 files changed, 354 insertions(+), 343 deletions(-)

diff --git a/scripts/genksyms/parse.tab.c_shipped b/scripts/genksyms/parse.tab.c_shipped
index c9f0f0ce82ff73..99950b5afb0dd6 100644
--- a/scripts/genksyms/parse.tab.c_shipped
+++ b/scripts/genksyms/parse.tab.c_shipped
@@ -1,4 +1,4 @@
-/* A Bison parser, made by GNU Bison 2.5.1.  */
+/* A Bison parser, made by GNU Bison 2.7.  */
 
 /* Bison implementation for Yacc-like parsers in C
    
@@ -44,7 +44,7 @@
 #define YYBISON 1
 
 /* Bison version.  */
-#define YYBISON_VERSION "2.5.1"
+#define YYBISON_VERSION "2.7"
 
 /* Skeleton name.  */
 #define YYSKELETON_NAME "yacc.c"
@@ -58,8 +58,6 @@
 /* Pull parsers.  */
 #define YYPULL 1
 
-/* Using locations.  */
-#define YYLSP_NEEDED 0
 
 
@@ -125,11 +123,6 @@ static void record_compound(struct string_list **keyw,
 #  endif
 # endif
 
-/* Enabling traces.  */
-#ifndef YYDEBUG
-# define YYDEBUG 1
-#endif
-
 /* Enabling verbose error messages.  */
 #ifdef YYERROR_VERBOSE
 # undef YYERROR_VERBOSE
@@ -138,11 +131,14 @@ static void record_compound(struct string_list **keyw,
 # define YYERROR_VERBOSE 0
 #endif
 
-/* Enabling the token table.  */
-#ifndef YYTOKEN_TABLE
-# define YYTOKEN_TABLE 0
-#endif
 
+/* Enabling traces.  */
+#ifndef YYDEBUG
+# define YYDEBUG 1
+#endif
+#if YYDEBUG
+extern int yydebug;
+#endif
 
 /* Tokens.  */
 #ifndef YYTOKENTYPE
@@ -196,7 +192,6 @@ static void record_compound(struct string_list **keyw,
 #endif
 
 
-
 #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
 typedef int YYSTYPE;
 # define YYSTYPE_IS_TRIVIAL 1
@@ -204,6 +199,23 @@ typedef int YYSTYPE;
 # define YYSTYPE_IS_DECLARED 1
 #endif
 
+extern YYSTYPE yylval;
+
+#ifdef YYPARSE_PARAM
+#if defined __STDC__ || defined __cplusplus
+int yyparse (void *YYPARSE_PARAM);
+#else
+int yyparse ();
+#endif
+#else /* ! YYPARSE_PARAM */
+#if defined __STDC__ || defined __cplusplus
+int yyparse (void);
+#else
+int yyparse ();
+#endif
+#endif /* ! YYPARSE_PARAM */
+
+
 
 /* Copy the second part of user declarations.  */
 
@@ -260,24 +272,24 @@ typedef short int yytype_int16;
 # if defined YYENABLE_NLS && YYENABLE_NLS
 #  if ENABLE_NLS
 #   include <libintl.h> /* INFRINGES ON USER NAME SPACE */
-#   define YY_(msgid) dgettext ("bison-runtime", msgid)
+#   define YY_(Msgid) dgettext ("bison-runtime", Msgid)
 #  endif
 # endif
 # ifndef YY_
-#  define YY_(msgid) msgid
+#  define YY_(Msgid) Msgid
 # endif
 #endif
 
 /* Suppress unused-variable warnings by "using" E.  */
 #if ! defined lint || defined __GNUC__
-# define YYUSE(e) ((void) (e))
+# define YYUSE(E) ((void) (E))
 #else
-# define YYUSE(e) /* empty */
+# define YYUSE(E) /* empty */
 #endif
 
 /* Identity function, used to suppress warnings about constant conditions.  */
 #ifndef lint
-# define YYID(n) (n)
+# define YYID(N) (N)
 #else
 #if (defined __STDC__ || defined __C99__FUNC__ \
      || defined __cplusplus || defined _MSC_VER)
@@ -427,16 +439,16 @@ union yyalloc
 /* YYFINAL -- State number of the termination state.  */
 #define YYFINAL  4
 /* YYLAST -- Last index in YYTABLE.  */
-#define YYLAST   514
+#define YYLAST   515
 
 /* YYNTOKENS -- Number of terminals.  */
 #define YYNTOKENS  54
 /* YYNNTS -- Number of nonterminals.  */
 #define YYNNTS  49
 /* YYNRULES -- Number of rules.  */
-#define YYNRULES  132
+#define YYNRULES  133
 /* YYNRULES -- Number of states.  */
-#define YYNSTATES  187
+#define YYNSTATES  188
 
 /* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX.  */
 #define YYUNDEFTOK  2
@@ -492,13 +504,13 @@ static const yytype_uint16 yyprhs[] =
       97,   101,   105,   109,   112,   115,   118,   120,   122,   124,
      126,   128,   130,   132,   134,   136,   138,   140,   143,   144,
      146,   148,   151,   153,   155,   157,   159,   162,   164,   166,
-     171,   176,   179,   183,   187,   190,   192,   194,   196,   201,
-     206,   209,   213,   217,   220,   222,   226,   227,   229,   231,
-     235,   238,   241,   243,   244,   246,   248,   253,   258,   261,
-     265,   269,   273,   274,   276,   279,   283,   287,   288,   290,
-     292,   295,   299,   302,   303,   305,   307,   311,   314,   317,
-     319,   322,   323,   326,   330,   335,   337,   341,   343,   347,
-     350,   351,   353
+     168,   173,   178,   181,   185,   189,   192,   194,   196,   198,
+     203,   208,   211,   215,   219,   222,   224,   228,   229,   231,
+     233,   237,   240,   243,   245,   246,   248,   250,   255,   260,
+     263,   267,   271,   275,   276,   278,   281,   285,   289,   290,
+     292,   294,   297,   301,   304,   305,   307,   309,   313,   316,
+     319,   321,   324,   325,   328,   332,   337,   339,   343,   345,
+     349,   352,   353,   355
 };
 
 /* YYRHS -- A `-1'-separated list of the rules' RHS.  */
@@ -520,26 +532,27 @@ static const yytype_int8 yyrhs[] =
       13,    -1,     9,    -1,    26,    -1,     6,    -1,    42,    -1,
       50,    72,    -1,    -1,    73,    -1,    74,    -1,    73,    74,
       -1,     8,    -1,    27,    -1,    31,    -1,    18,    -1,    71,
-      75,    -1,    76,    -1,    38,    -1,    76,    48,    79,    49,
-      -1,    76,    48,     1,    49,    -1,    76,    34,    -1,    48,
-      75,    49,    -1,    48,     1,    49,    -1,    71,    77,    -1,
-      78,    -1,    38,    -1,    42,    -1,    78,    48,    79,    49,
-      -1,    78,    48,     1,    49,    -1,    78,    34,    -1,    48,
-      77,    49,    -1,    48,     1,    49,    -1,    80,    37,    -1,
-      80,    -1,    81,    47,    37,    -1,    -1,    81,    -1,    82,
-      -1,    81,    47,    82,    -1,    66,    83,    -1,    71,    83,
-      -1,    84,    -1,    -1,    38,    -1,    42,    -1,    84,    48,
-      79,    49,    -1,    84,    48,     1,    49,    -1,    84,    34,
-      -1,    48,    83,    49,    -1,    48,     1,    49,    -1,    65,
-      75,    33,    -1,    -1,    87,    -1,    51,    35,    -1,    52,
-      89,    46,    -1,    52,     1,    46,    -1,    -1,    90,    -1,
-      91,    -1,    90,    91,    -1,    65,    92,    45,    -1,     1,
-      45,    -1,    -1,    93,    -1,    94,    -1,    93,    47,    94,
-      -1,    77,    96,    -1,    38,    95,    -1,    95,    -1,    53,
-      35,    -1,    -1,    96,    31,    -1,    52,    98,    46,    -1,
-      52,    98,    47,    46,    -1,    99,    -1,    98,    47,    99,
-      -1,    38,    -1,    38,    51,    35,    -1,    30,    45,    -1,
-      -1,    30,    -1,    29,    48,    38,    49,    45,    -1
+      75,    -1,    76,    -1,    38,    -1,    42,    -1,    76,    48,
+      79,    49,    -1,    76,    48,     1,    49,    -1,    76,    34,
+      -1,    48,    75,    49,    -1,    48,     1,    49,    -1,    71,
+      77,    -1,    78,    -1,    38,    -1,    42,    -1,    78,    48,
+      79,    49,    -1,    78,    48,     1,    49,    -1,    78,    34,
+      -1,    48,    77,    49,    -1,    48,     1,    49,    -1,    80,
+      37,    -1,    80,    -1,    81,    47,    37,    -1,    -1,    81,
+      -1,    82,    -1,    81,    47,    82,    -1,    66,    83,    -1,
+      71,    83,    -1,    84,    -1,    -1,    38,    -1,    42,    -1,
+      84,    48,    79,    49,    -1,    84,    48,     1,    49,    -1,
+      84,    34,    -1,    48,    83,    49,    -1,    48,     1,    49,
+      -1,    65,    75,    33,    -1,    -1,    87,    -1,    51,    35,
+      -1,    52,    89,    46,    -1,    52,     1,    46,    -1,    -1,
+      90,    -1,    91,    -1,    90,    91,    -1,    65,    92,    45,
+      -1,     1,    45,    -1,    -1,    93,    -1,    94,    -1,    93,
+      47,    94,    -1,    77,    96,    -1,    38,    95,    -1,    95,
+      -1,    53,    35,    -1,    -1,    96,    31,    -1,    52,    98,
+      46,    -1,    52,    98,    47,    46,    -1,    99,    -1,    98,
+      47,    99,    -1,    38,    -1,    38,    51,    35,    -1,    30,
+      45,    -1,    -1,    30,    -1,    29,    48,    38,    49,    45,
+      -1
 };
 
 /* YYRLINE[YYN] -- source line where rule number YYN was defined.  */
@@ -552,17 +565,17 @@ static const yytype_uint16 yyrline[] =
      237,   239,   241,   246,   249,   250,   254,   255,   256,   257,
      258,   259,   260,   261,   262,   263,   264,   268,   273,   274,
      278,   279,   283,   283,   283,   284,   292,   293,   297,   306,
-     308,   310,   312,   314,   321,   322,   326,   327,   328,   330,
-     332,   334,   336,   341,   342,   343,   347,   348,   352,   353,
-     358,   363,   365,   369,   370,   378,   382,   384,   386,   388,
-     390,   395,   404,   405,   410,   415,   416,   420,   421,   425,
-     426,   430,   432,   437,   438,   442,   443,   447,   448,   449,
-     453,   457,   458,   462,   463,   467,   468,   471,   476,   484,
-     488,   489,   493
+     315,   317,   319,   321,   323,   330,   331,   335,   336,   337,
+     339,   341,   343,   345,   350,   351,   352,   356,   357,   361,
+     362,   367,   372,   374,   378,   379,   387,   391,   393,   395,
+     397,   399,   404,   413,   414,   419,   424,   425,   429,   430,
+     434,   435,   439,   441,   446,   447,   451,   452,   456,   457,
+     458,   462,   466,   467,   471,   472,   476,   477,   480,   485,
+     493,   497,   498,   502
 };
 #endif
 
-#if YYDEBUG || YYERROR_VERBOSE || YYTOKEN_TABLE
+#if YYDEBUG || YYERROR_VERBOSE || 0
 /* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
    First, the terminals, then, starting at YYNTOKENS, nonterminals.  */
 static const char *const yytname[] =
@@ -621,13 +634,13 @@ static const yytype_uint8 yyr1[] =
       69,    69,    69,    69,    69,    69,    70,    70,    70,    70,
       70,    70,    70,    70,    70,    70,    70,    71,    72,    72,
       73,    73,    74,    74,    74,    74,    75,    75,    76,    76,
-      76,    76,    76,    76,    77,    77,    78,    78,    78,    78,
-      78,    78,    78,    79,    79,    79,    80,    80,    81,    81,
-      82,    83,    83,    84,    84,    84,    84,    84,    84,    84,
-      84,    85,    86,    86,    87,    88,    88,    89,    89,    90,
-      90,    91,    91,    92,    92,    93,    93,    94,    94,    94,
-      95,    96,    96,    97,    97,    98,    98,    99,    99,   100,
-     101,   101,   102
+      76,    76,    76,    76,    76,    77,    77,    78,    78,    78,
+      78,    78,    78,    78,    79,    79,    79,    80,    80,    81,
+      81,    82,    83,    83,    84,    84,    84,    84,    84,    84,
+      84,    84,    85,    86,    86,    87,    88,    88,    89,    89,
+      90,    90,    91,    91,    92,    92,    93,    93,    94,    94,
+      94,    95,    96,    96,    97,    97,    98,    98,    99,    99,
+     100,   101,   101,   102
 };
 
 /* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN.  */
@@ -639,14 +652,14 @@ static const yytype_uint8 yyr2[] =
        1,     1,     1,     1,     1,     4,     1,     2,     2,     2,
        3,     3,     3,     2,     2,     2,     1,     1,     1,     1,
        1,     1,     1,     1,     1,     1,     1,     2,     0,     1,
-       1,     2,     1,     1,     1,     1,     2,     1,     1,     4,
-       4,     2,     3,     3,     2,     1,     1,     1,     4,     4,
-       2,     3,     3,     2,     1,     3,     0,     1,     1,     3,
-       2,     2,     1,     0,     1,     1,     4,     4,     2,     3,
-       3,     3,     0,     1,     2,     3,     3,     0,     1,     1,
-       2,     3,     2,     0,     1,     1,     3,     2,     2,     1,
-       2,     0,     2,     3,     4,     1,     3,     1,     3,     2,
-       0,     1,     5
+       1,     2,     1,     1,     1,     1,     2,     1,     1,     1,
+       4,     4,     2,     3,     3,     2,     1,     1,     1,     4,
+       4,     2,     3,     3,     2,     1,     3,     0,     1,     1,
+       3,     2,     2,     1,     0,     1,     1,     4,     4,     2,
+       3,     3,     3,     0,     1,     2,     3,     3,     0,     1,
+       1,     2,     3,     2,     0,     1,     1,     3,     2,     2,
+       1,     2,     0,     2,     3,     4,     1,     3,     1,     3,
+       2,     0,     1,     5
 };
 
 /* YYDEFACT[STATE-NAME] -- Default reduction number in state STATE-NUM.
@@ -660,187 +673,187 @@ static const yytype_uint8 yydefact[] =
        0,     0,     0,    64,    36,    56,     5,    10,    17,    23,
       24,    26,    27,    33,    34,    11,    12,    13,    14,    15,
       39,     0,    43,     6,    37,     0,    44,    22,    38,    45,
-       0,     0,   129,    68,     0,    58,     0,    18,    19,     0,
-     130,    67,    25,    42,   127,     0,   125,    22,    40,     0,
-     113,     0,     0,   109,     9,    17,    41,    93,     0,     0,
-       0,     0,    57,    59,    60,    16,     0,    66,   131,   101,
-     121,    71,     0,     0,   123,     0,     7,   112,   106,    76,
-      77,     0,     0,     0,   121,    75,     0,   114,   115,   119,
-     105,     0,   110,   130,    94,    56,     0,    93,    90,    92,
-      35,     0,    73,    72,    61,    20,   102,     0,     0,    84,
-      87,    88,   128,   124,   126,   118,     0,    76,     0,   120,
-      74,   117,    80,     0,   111,     0,     0,    95,     0,    91,
-      98,     0,   132,   122,     0,    21,   103,    70,    69,    83,
-       0,    82,    81,     0,     0,   116,   100,    99,     0,     0,
-     104,    85,    89,    79,    78,    97,    96
+       0,     0,   130,    68,    69,     0,    58,     0,    18,    19,
+       0,   131,    67,    25,    42,   128,     0,   126,    22,    40,
+       0,   114,     0,     0,   110,     9,    17,    41,    94,     0,
+       0,     0,     0,    57,    59,    60,    16,     0,    66,   132,
+     102,   122,    72,     0,     0,   124,     0,     7,   113,   107,
+      77,    78,     0,     0,     0,   122,    76,     0,   115,   116,
+     120,   106,     0,   111,   131,    95,    56,     0,    94,    91,
+      93,    35,     0,    74,    73,    61,    20,   103,     0,     0,
+      85,    88,    89,   129,   125,   127,   119,     0,    77,     0,
+     121,    75,   118,    81,     0,   112,     0,     0,    96,     0,
+      92,    99,     0,   133,   123,     0,    21,   104,    71,    70,
+      84,     0,    83,    82,     0,     0,   117,   101,   100,     0,
+       0,   105,    86,    90,    80,    79,    98,    97
 };
 
 /* YYDEFGOTO[NTERM-NUM].  */
 static const yytype_int16 yydefgoto[] =
 {
-      -1,     1,     2,     3,    36,    77,    57,    37,    66,    67,
-      68,    80,    39,    40,    41,    42,    43,    69,    92,    93,
-      44,   123,    71,   114,   115,   138,   139,   140,   141,   128,
-     129,    45,   165,   166,    56,    81,    82,    83,   116,   117,
-     118,   119,   136,    52,    75,    76,    46,   100,    47
+      -1,     1,     2,     3,    36,    78,    57,    37,    67,    68,
+      69,    81,    39,    40,    41,    42,    43,    70,    93,    94,
+      44,   124,    72,   115,   116,   139,   140,   141,   142,   129,
+     130,    45,   166,   167,    56,    82,    83,    84,   117,   118,
+     119,   120,   137,    52,    76,    77,    46,   101,    47
 };
 
 /* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
    STATE-NUM.  */
-#define YYPACT_NINF -140
+#define YYPACT_NINF -92
 static const yytype_int16 yypact[] =
 {
-    -140,    29,  -140,   207,  -140,  -140,    40,  -140,  -140,  -140,
-    -140,  -140,   -27,  -140,    44,  -140,  -140,  -140,  -140,  -140,
-    -140,  -140,  -140,  -140,   -22,  -140,   -18,  -140,  -140,  -140,
-      -9,    22,    28,  -140,  -140,  -140,  -140,  -140,    42,   472,
-    -140,  -140,  -140,  -140,  -140,  -140,  -140,  -140,  -140,  -140,
-      46,    43,  -140,  -140,    47,   107,  -140,   472,    47,  -140,
-     472,    62,  -140,  -140,    16,    -3,    57,    56,  -140,    42,
-      35,   -11,  -140,  -140,    53,    48,  -140,   472,  -140,    51,
-      21,    59,   157,  -140,  -140,    42,  -140,   388,    58,    60,
-      70,    81,  -140,    -3,  -140,  -140,    42,  -140,  -140,  -140,
-    -140,  -140,   253,    71,  -140,   -20,  -140,  -140,  -140,    83,
-    -140,     5,   102,    34,  -140,    12,    95,    94,  -140,  -140,
-    -140,    97,  -140,   113,  -140,  -140,     2,    41,  -140,    27,
-    -140,    99,  -140,  -140,  -140,  -140,   -24,    98,   101,   109,
-     104,  -140,  -140,  -140,  -140,  -140,   105,  -140,   110,  -140,
-    -140,   117,  -140,   298,  -140,    21,   112,  -140,   120,  -140,
-    -140,   343,  -140,  -140,   121,  -140,  -140,  -140,  -140,  -140,
-     434,  -140,  -140,   131,   137,  -140,  -140,  -140,   138,   141,
-    -140,  -140,  -140,  -140,  -140,  -140,  -140
+     -92,    19,   -92,   208,   -92,   -92,    39,   -92,   -92,   -92,
+     -92,   -92,   -27,   -92,    23,   -92,   -92,   -92,   -92,   -92,
+     -92,   -92,   -92,   -92,   -22,   -92,     9,   -92,   -92,   -92,
+      -6,    16,    25,   -92,   -92,   -92,   -92,   -92,    31,   473,
+     -92,   -92,   -92,   -92,   -92,   -92,   -92,   -92,   -92,   -92,
+      49,    37,   -92,   -92,    51,   108,   -92,   473,    51,   -92,
+     473,    59,   -92,   -92,   -92,    12,    -3,    60,    57,   -92,
+      31,    -7,    24,   -92,   -92,    55,    42,   -92,   473,   -92,
+      46,   -21,    61,   158,   -92,   -92,    31,   -92,   389,    71,
+      82,    88,    89,   -92,    -3,   -92,   -92,    31,   -92,   -92,
+     -92,   -92,   -92,   254,    73,   -92,   -24,   -92,   -92,   -92,
+      90,   -92,    17,    75,    45,   -92,    32,    96,    95,   -92,
+     -92,   -92,    99,   -92,   115,   -92,   -92,     3,    48,   -92,
+      34,   -92,   102,   -92,   -92,   -92,   -92,   -11,   100,   103,
+     111,   104,   -92,   -92,   -92,   -92,   -92,   106,   -92,   113,
+     -92,   -92,   126,   -92,   299,   -92,   -21,   121,   -92,   132,
+     -92,   -92,   344,   -92,   -92,   125,   -92,   -92,   -92,   -92,
+     -92,   435,   -92,   -92,   138,   139,   -92,   -92,   -92,   142,
+     143,   -92,   -92,   -92,   -92,   -92,   -92,   -92
 };
 
 /* YYPGOTO[NTERM-NUM].  */
 static const yytype_int16 yypgoto[] =
 {
-    -140,  -140,   190,  -140,  -140,  -140,  -140,   -45,  -140,  -140,
-      96,     1,   -60,   -31,  -140,  -140,  -140,   -78,  -140,  -140,
-     -55,    -7,  -140,   -92,  -140,  -139,  -140,  -140,   -59,   -39,
-    -140,  -140,  -140,  -140,   -13,  -140,  -140,   111,  -140,  -140,
-      39,    87,    84,   147,  -140,   106,  -140,  -140,  -140
+     -92,   -92,   192,   -92,   -92,   -92,   -92,   -47,   -92,   -92,
+      97,     0,   -60,   -32,   -92,   -92,   -92,   -79,   -92,   -92,
+     -58,   -26,   -92,   -38,   -92,   -91,   -92,   -92,   -59,   -28,
+     -92,   -92,   -92,   -92,   -20,   -92,   -92,   112,   -92,   -92,
+      41,    91,    83,   149,   -92,   101,   -92,   -92,   -92
 };
 
 /* YYTABLE[YYPACT[STATE-NUM]].  What to do in state STATE-NUM.  If
    positive, shift that token.  If negative, reduce the rule which
    number is the opposite.  If YYTABLE_NINF, syntax error.  */
-#define YYTABLE_NINF -109
+#define YYTABLE_NINF -110
 static const yytype_int16 yytable[] =
 {
-      87,    88,   113,   156,    38,    10,   146,   163,    72,   127,
-      94,    50,    84,    59,   174,    20,    54,    90,    74,   148,
-      58,   150,   179,   101,    29,    51,   143,   164,    33,     4,
-      55,    70,   106,   113,    55,   113,   -93,   102,   134,    60,
-     124,    78,    87,   147,   157,    86,   152,   110,   127,   127,
-     126,   -93,    65,   111,    63,    65,    72,    91,    85,   109,
-     153,   160,    97,   110,    64,    98,    65,    53,    99,   111,
-      61,    65,   147,    62,   112,   161,   110,   113,    85,   124,
-      63,    74,   111,   157,    65,    48,    49,   158,   159,   126,
-      64,    65,    65,    87,   104,   105,   107,   108,    51,    55,
-      89,    87,    95,    96,   103,   120,   142,   130,    79,   131,
-      87,   182,     7,     8,     9,    10,    11,    12,    13,   132,
-      15,    16,    17,    18,    19,    20,    21,    22,    23,    24,
-     133,    26,    27,    28,    29,    30,   112,   149,    33,    34,
-     154,   155,   107,    98,   162,   -22,   169,   167,   163,    35,
-     168,   170,   -22,  -107,   171,   -22,   180,   -22,   121,   172,
-     -22,   176,     7,     8,     9,    10,    11,    12,    13,   177,
-      15,    16,    17,    18,    19,    20,    21,    22,    23,    24,
-     183,    26,    27,    28,    29,    30,   184,   185,    33,    34,
-     186,     5,   135,   122,   175,   -22,   145,    73,   151,    35,
-       0,     0,   -22,  -108,     0,   -22,     0,   -22,     6,     0,
-     -22,   144,     7,     8,     9,    10,    11,    12,    13,    14,
-      15,    16,    17,    18,    19,    20,    21,    22,    23,    24,
-      25,    26,    27,    28,    29,    30,    31,    32,    33,    34,
-       0,     0,     0,     0,     0,   -22,     0,     0,     0,    35,
-       0,     0,   -22,     0,   137,   -22,     0,   -22,     7,     8,
-       9,    10,    11,    12,    13,     0,    15,    16,    17,    18,
-      19,    20,    21,    22,    23,    24,     0,    26,    27,    28,
-      29,    30,     0,     0,    33,    34,     0,     0,     0,     0,
-     -86,     0,     0,     0,     0,    35,     0,     0,     0,   173,
-       0,     0,   -86,     7,     8,     9,    10,    11,    12,    13,
-       0,    15,    16,    17,    18,    19,    20,    21,    22,    23,
-      24,     0,    26,    27,    28,    29,    30,     0,     0,    33,
-      34,     0,     0,     0,     0,   -86,     0,     0,     0,     0,
-      35,     0,     0,     0,   178,     0,     0,   -86,     7,     8,
-       9,    10,    11,    12,    13,     0,    15,    16,    17,    18,
-      19,    20,    21,    22,    23,    24,     0,    26,    27,    28,
-      29,    30,     0,     0,    33,    34,     0,     0,     0,     0,
-     -86,     0,     0,     0,     0,    35,     0,     0,     0,     0,
-       0,     0,   -86,     7,     8,     9,    10,    11,    12,    13,
-       0,    15,    16,    17,    18,    19,    20,    21,    22,    23,
-      24,     0,    26,    27,    28,    29,    30,     0,     0,    33,
-      34,     0,     0,     0,     0,     0,   124,     0,     0,     0,
-     125,     0,     0,     0,     0,     0,   126,     0,    65,     7,
+      88,    89,   114,    38,   157,    10,    59,    73,    95,   128,
+      85,    50,    71,    91,    75,    20,    54,   110,   147,     4,
+     164,   111,   144,    99,    29,    51,   100,   112,    33,    66,
+      55,   107,   113,   114,    79,   114,   135,   -94,    87,    92,
+     165,   125,    60,    88,    98,   158,    53,    58,   128,   128,
+      63,   127,   -94,    66,    64,   148,    73,    86,   102,   111,
+      65,    55,    66,   175,    61,   112,   153,    66,   161,    63,
+      62,   180,   103,    64,   149,    75,   151,   114,    86,    65,
+     154,    66,   162,   148,    48,    49,   125,   111,   105,   106,
+     158,   108,   109,   112,    88,    66,   127,    90,    66,   159,
+     160,    51,    88,    55,    97,    96,   104,   121,   143,    80,
+     150,    88,   183,     7,     8,     9,    10,    11,    12,    13,
+     131,    15,    16,    17,    18,    19,    20,    21,    22,    23,
+      24,   132,    26,    27,    28,    29,    30,   133,   134,    33,
+      34,   155,   156,   113,   108,    99,   -22,   163,   170,   168,
+      35,   171,   169,   -22,  -108,   172,   -22,   164,   -22,   122,
+     181,   -22,   173,     7,     8,     9,    10,    11,    12,    13,
+     177,    15,    16,    17,    18,    19,    20,    21,    22,    23,
+      24,   178,    26,    27,    28,    29,    30,   184,   185,    33,
+      34,   186,   187,     5,   136,   123,   -22,   176,   152,    74,
+      35,   146,     0,   -22,  -109,     0,   -22,   145,   -22,     6,
+       0,   -22,     0,     7,     8,     9,    10,    11,    12,    13,
+      14,    15,    16,    17,    18,    19,    20,    21,    22,    23,
+      24,    25,    26,    27,    28,    29,    30,    31,    32,    33,
+      34,     0,     0,     0,     0,     0,   -22,     0,     0,     0,
+      35,     0,     0,   -22,     0,   138,   -22,     0,   -22,     7,
+       8,     9,    10,    11,    12,    13,     0,    15,    16,    17,
+      18,    19,    20,    21,    22,    23,    24,     0,    26,    27,
+      28,    29,    30,     0,     0,    33,    34,     0,     0,     0,
+       0,   -87,     0,     0,     0,     0,    35,     0,     0,     0,
+     174,     0,     0,   -87,     7,     8,     9,    10,    11,    12,
+      13,     0,    15,    16,    17,    18,    19,    20,    21,    22,
+      23,    24,     0,    26,    27,    28,    29,    30,     0,     0,
+      33,    34,     0,     0,     0,     0,   -87,     0,     0,     0,
+       0,    35,     0,     0,     0,   179,     0,     0,   -87,     7,
        8,     9,    10,    11,    12,    13,     0,    15,    16,    17,
       18,    19,    20,    21,    22,    23,    24,     0,    26,    27,
       28,    29,    30,     0,     0,    33,    34,     0,     0,     0,
-       0,   181,     0,     0,     0,     0,    35,     7,     8,     9,
-      10,    11,    12,    13,     0,    15,    16,    17,    18,    19,
-      20,    21,    22,    23,    24,     0,    26,    27,    28,    29,
-      30,     0,     0,    33,    34,     0,     0,     0,     0,     0,
-       0,     0,     0,     0,    35
+       0,   -87,     0,     0,     0,     0,    35,     0,     0,     0,
+       0,     0,     0,   -87,     7,     8,     9,    10,    11,    12,
+      13,     0,    15,    16,    17,    18,    19,    20,    21,    22,
+      23,    24,     0,    26,    27,    28,    29,    30,     0,     0,
+      33,    34,     0,     0,     0,     0,     0,   125,     0,     0,
+       0,   126,     0,     0,     0,     0,     0,   127,     0,    66,
+       7,     8,     9,    10,    11,    12,    13,     0,    15,    16,
+      17,    18,    19,    20,    21,    22,    23,    24,     0,    26,
+      27,    28,    29,    30,     0,     0,    33,    34,     0,     0,
+       0,     0,   182,     0,     0,     0,     0,    35,     7,     8,
+       9,    10,    11,    12,    13,     0,    15,    16,    17,    18,
+      19,    20,    21,    22,    23,    24,     0,    26,    27,    28,
+      29,    30,     0,     0,    33,    34,     0,     0,     0,     0,
+       0,     0,     0,     0,     0,    35
 };
 
-#define yypact_value_is_default(yystate) \
-  ((yystate) == (-140))
+#define yypact_value_is_default(Yystate) \
+  (!!((Yystate) == (-92)))
 
-#define yytable_value_is_error(yytable_value) \
+#define yytable_value_is_error(Yytable_value) \
   YYID (0)
 
 static const yytype_int16 yycheck[] =
 {
-      60,    60,    80,     1,     3,     8,     1,    31,    39,    87,
-      65,    38,    57,    26,   153,    18,    38,     1,    38,   111,
-      38,   113,   161,    34,    27,    52,    46,    51,    31,     0,
-      52,    38,    77,   111,    52,   113,    34,    48,    93,    48,
-      38,    54,   102,    38,    42,    58,    34,    42,   126,   127,
-      48,    49,    50,    48,    38,    50,    87,    64,    57,    38,
-      48,    34,    69,    42,    48,    30,    50,    23,    33,    48,
-      48,    50,    38,    45,    53,    48,    42,   155,    77,    38,
-      38,    38,    48,    42,    50,    45,    46,   126,   127,    48,
-      48,    50,    50,   153,    46,    47,    45,    46,    52,    52,
-      38,   161,    45,    47,    51,    46,    35,    49,     1,    49,
-     170,   170,     5,     6,     7,     8,     9,    10,    11,    49,
-      13,    14,    15,    16,    17,    18,    19,    20,    21,    22,
-      49,    24,    25,    26,    27,    28,    53,    35,    31,    32,
-      45,    47,    45,    30,    45,    38,    37,    49,    31,    42,
-      49,    47,    45,    46,    49,    48,    35,    50,     1,    49,
-      53,    49,     5,     6,     7,     8,     9,    10,    11,    49,
-      13,    14,    15,    16,    17,    18,    19,    20,    21,    22,
-      49,    24,    25,    26,    27,    28,    49,    49,    31,    32,
-      49,     1,    96,    82,   155,    38,   109,    50,   114,    42,
-      -1,    -1,    45,    46,    -1,    48,    -1,    50,     1,    -1,
-      53,   105,     5,     6,     7,     8,     9,    10,    11,    12,
-      13,    14,    15,    16,    17,    18,    19,    20,    21,    22,
-      23,    24,    25,    26,    27,    28,    29,    30,    31,    32,
-      -1,    -1,    -1,    -1,    -1,    38,    -1,    -1,    -1,    42,
-      -1,    -1,    45,    -1,     1,    48,    -1,    50,     5,     6,
-       7,     8,     9,    10,    11,    -1,    13,    14,    15,    16,
-      17,    18,    19,    20,    21,    22,    -1,    24,    25,    26,
-      27,    28,    -1,    -1,    31,    32,    -1,    -1,    -1,    -1,
-      37,    -1,    -1,    -1,    -1,    42,    -1,    -1,    -1,     1,
-      -1,    -1,    49,     5,     6,     7,     8,     9,    10,    11,
-      -1,    13,    14,    15,    16,    17,    18,    19,    20,    21,
-      22,    -1,    24,    25,    26,    27,    28,    -1,    -1,    31,
-      32,    -1,    -1,    -1,    -1,    37,    -1,    -1,    -1,    -1,
-      42,    -1,    -1,    -1,     1,    -1,    -1,    49,     5,     6,
-       7,     8,     9,    10,    11,    -1,    13,    14,    15,    16,
-      17,    18,    19,    20,    21,    22,    -1,    24,    25,    26,
-      27,    28,    -1,    -1,    31,    32,    -1,    -1,    -1,    -1,
-      37,    -1,    -1,    -1,    -1,    42,    -1,    -1,    -1,    -1,
-      -1,    -1,    49,     5,     6,     7,     8,     9,    10,    11,
-      -1,    13,    14,    15,    16,    17,    18,    19,    20,    21,
-      22,    -1,    24,    25,    26,    27,    28,    -1,    -1,    31,
+      60,    60,    81,     3,     1,     8,    26,    39,    66,    88,
+      57,    38,    38,     1,    38,    18,    38,    38,     1,     0,
+      31,    42,    46,    30,    27,    52,    33,    48,    31,    50,
+      52,    78,    53,   112,    54,   114,    94,    34,    58,    65,
+      51,    38,    48,   103,    70,    42,    23,    38,   127,   128,
+      38,    48,    49,    50,    42,    38,    88,    57,    34,    42,
+      48,    52,    50,   154,    48,    48,    34,    50,    34,    38,
+      45,   162,    48,    42,   112,    38,   114,   156,    78,    48,
+      48,    50,    48,    38,    45,    46,    38,    42,    46,    47,
+      42,    45,    46,    48,   154,    50,    48,    38,    50,   127,
+     128,    52,   162,    52,    47,    45,    51,    46,    35,     1,
+      35,   171,   171,     5,     6,     7,     8,     9,    10,    11,
+      49,    13,    14,    15,    16,    17,    18,    19,    20,    21,
+      22,    49,    24,    25,    26,    27,    28,    49,    49,    31,
+      32,    45,    47,    53,    45,    30,    38,    45,    37,    49,
+      42,    47,    49,    45,    46,    49,    48,    31,    50,     1,
+      35,    53,    49,     5,     6,     7,     8,     9,    10,    11,
+      49,    13,    14,    15,    16,    17,    18,    19,    20,    21,
+      22,    49,    24,    25,    26,    27,    28,    49,    49,    31,
+      32,    49,    49,     1,    97,    83,    38,   156,   115,    50,
+      42,   110,    -1,    45,    46,    -1,    48,   106,    50,     1,
+      -1,    53,    -1,     5,     6,     7,     8,     9,    10,    11,
+      12,    13,    14,    15,    16,    17,    18,    19,    20,    21,
+      22,    23,    24,    25,    26,    27,    28,    29,    30,    31,
       32,    -1,    -1,    -1,    -1,    -1,    38,    -1,    -1,    -1,
-      42,    -1,    -1,    -1,    -1,    -1,    48,    -1,    50,     5,
+      42,    -1,    -1,    45,    -1,     1,    48,    -1,    50,     5,
        6,     7,     8,     9,    10,    11,    -1,    13,    14,    15,
       16,    17,    18,    19,    20,    21,    22,    -1,    24,    25,
       26,    27,    28,    -1,    -1,    31,    32,    -1,    -1,    -1,
-      -1,    37,    -1,    -1,    -1,    -1,    42,     5,     6,     7,
-       8,     9,    10,    11,    -1,    13,    14,    15,    16,    17,
-      18,    19,    20,    21,    22,    -1,    24,    25,    26,    27,
-      28,    -1,    -1,    31,    32,    -1,    -1,    -1,    -1,    -1,
-      -1,    -1,    -1,    -1,    42
+      -1,    37,    -1,    -1,    -1,    -1,    42,    -1,    -1,    -1,
+       1,    -1,    -1,    49,     5,     6,     7,     8,     9,    10,
+      11,    -1,    13,    14,    15,    16,    17,    18,    19,    20,
+      21,    22,    -1,    24,    25,    26,    27,    28,    -1,    -1,
+      31,    32,    -1,    -1,    -1,    -1,    37,    -1,    -1,    -1,
+      -1,    42,    -1,    -1,    -1,     1,    -1,    -1,    49,     5,
+       6,     7,     8,     9,    10,    11,    -1,    13,    14,    15,
+      16,    17,    18,    19,    20,    21,    22,    -1,    24,    25,
+      26,    27,    28,    -1,    -1,    31,    32,    -1,    -1,    -1,
+      -1,    37,    -1,    -1,    -1,    -1,    42,    -1,    -1,    -1,
+      -1,    -1,    -1,    49,     5,     6,     7,     8,     9,    10,
+      11,    -1,    13,    14,    15,    16,    17,    18,    19,    20,
+      21,    22,    -1,    24,    25,    26,    27,    28,    -1,    -1,
+      31,    32,    -1,    -1,    -1,    -1,    -1,    38,    -1,    -1,
+      -1,    42,    -1,    -1,    -1,    -1,    -1,    48,    -1,    50,
+       5,     6,     7,     8,     9,    10,    11,    -1,    13,    14,
+      15,    16,    17,    18,    19,    20,    21,    22,    -1,    24,
+      25,    26,    27,    28,    -1,    -1,    31,    32,    -1,    -1,
+      -1,    -1,    37,    -1,    -1,    -1,    -1,    42,     5,     6,
+       7,     8,     9,    10,    11,    -1,    13,    14,    15,    16,
+      17,    18,    19,    20,    21,    22,    -1,    24,    25,    26,
+      27,    28,    -1,    -1,    31,    32,    -1,    -1,    -1,    -1,
+      -1,    -1,    -1,    -1,    -1,    42
 };
 
 /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
@@ -853,19 +866,19 @@ static const yytype_uint8 yystos[] =
       28,    29,    30,    31,    32,    42,    58,    61,    65,    66,
       67,    68,    69,    70,    74,    85,   100,   102,    45,    46,
       38,    52,    97,    23,    38,    52,    88,    60,    38,    88,
-      48,    48,    45,    38,    48,    50,    62,    63,    64,    71,
-      75,    76,    67,    97,    38,    98,    99,    59,    88,     1,
-      65,    89,    90,    91,    61,    65,    88,    66,    82,    38,
-       1,    75,    72,    73,    74,    45,    47,    75,    30,    33,
-     101,    34,    48,    51,    46,    47,    61,    45,    46,    38,
-      42,    48,    53,    71,    77,    78,    92,    93,    94,    95,
-      46,     1,    91,    75,    38,    42,    48,    71,    83,    84,
-      49,    49,    49,    49,    74,    64,    96,     1,    79,    80,
-      81,    82,    35,    46,    99,    95,     1,    38,    77,    35,
-      77,    96,    34,    48,    45,    47,     1,    42,    83,    83,
-      34,    48,    45,    31,    51,    86,    87,    49,    49,    37,
-      47,    49,    49,     1,    79,    94,    49,    49,     1,    79,
-      35,    37,    82,    49,    49,    49,    49
+      48,    48,    45,    38,    42,    48,    50,    62,    63,    64,
+      71,    75,    76,    67,    97,    38,    98,    99,    59,    88,
+       1,    65,    89,    90,    91,    61,    65,    88,    66,    82,
+      38,     1,    75,    72,    73,    74,    45,    47,    75,    30,
+      33,   101,    34,    48,    51,    46,    47,    61,    45,    46,
+      38,    42,    48,    53,    71,    77,    78,    92,    93,    94,
+      95,    46,     1,    91,    75,    38,    42,    48,    71,    83,
+      84,    49,    49,    49,    49,    74,    64,    96,     1,    79,
+      80,    81,    82,    35,    46,    99,    95,     1,    38,    77,
+      35,    77,    96,    34,    48,    45,    47,     1,    42,    83,
+      83,    34,    48,    45,    31,    51,    86,    87,    49,    49,
+      37,    47,    49,    49,     1,    79,    94,    49,    49,     1,
+      79,    35,    37,    82,    49,    49,    49,    49
 };
 
 #define yyerrok		(yyerrstatus = 0)
@@ -912,46 +925,18 @@ do                                                              \
     }								\
 while (YYID (0))
 
-
+/* Error token number */
 #define YYTERROR	1
 #define YYERRCODE	256
 
 
-/* YYLLOC_DEFAULT -- Set CURRENT to span from RHS[1] to RHS[N].
-   If N is 0, then set CURRENT to the empty location which ends
-   the previous symbol: RHS[0] (always defined).  */
-
-#define YYRHSLOC(Rhs, K) ((Rhs)[K])
-#ifndef YYLLOC_DEFAULT
-# define YYLLOC_DEFAULT(Current, Rhs, N)				\
-    do									\
-      if (YYID (N))                                                    \
-	{								\
-	  (Current).first_line   = YYRHSLOC (Rhs, 1).first_line;	\
-	  (Current).first_column = YYRHSLOC (Rhs, 1).first_column;	\
-	  (Current).last_line    = YYRHSLOC (Rhs, N).last_line;		\
-	  (Current).last_column  = YYRHSLOC (Rhs, N).last_column;	\
-	}								\
-      else								\
-	{								\
-	  (Current).first_line   = (Current).last_line   =		\
-	    YYRHSLOC (Rhs, 0).last_line;				\
-	  (Current).first_column = (Current).last_column =		\
-	    YYRHSLOC (Rhs, 0).last_column;				\
-	}								\
-    while (YYID (0))
-#endif
-
-
 /* This macro is provided for backward compatibility. */
-
 #ifndef YY_LOCATION_PRINT
 # define YY_LOCATION_PRINT(File, Loc) ((void) 0)
 #endif
 
 
 /* YYLEX -- calling `yylex' with the right arguments.  */
-
 #ifdef YYLEX_PARAM
 # define YYLEX yylex (YYLEX_PARAM)
 #else
@@ -1014,7 +999,7 @@ yy_symbol_value_print (yyoutput, yytype, yyvaluep)
   switch (yytype)
     {
       default:
-	break;
+        break;
     }
 }
 
@@ -1256,7 +1241,6 @@ yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg,
 {
   YYSIZE_T yysize0 = yytnamerr (YY_NULL, yytname[yytoken]);
   YYSIZE_T yysize = yysize0;
-  YYSIZE_T yysize1;
   enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 };
   /* Internationalized format string. */
   const char *yyformat = YY_NULL;
@@ -1319,11 +1303,13 @@ yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg,
                     break;
                   }
                 yyarg[yycount++] = yytname[yyx];
-                yysize1 = yysize + yytnamerr (YY_NULL, yytname[yyx]);
-                if (! (yysize <= yysize1
-                       && yysize1 <= YYSTACK_ALLOC_MAXIMUM))
-                  return 2;
-                yysize = yysize1;
+                {
+                  YYSIZE_T yysize1 = yysize + yytnamerr (YY_NULL, yytname[yyx]);
+                  if (! (yysize <= yysize1
+                         && yysize1 <= YYSTACK_ALLOC_MAXIMUM))
+                    return 2;
+                  yysize = yysize1;
+                }
               }
         }
     }
@@ -1343,10 +1329,12 @@ yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg,
 # undef YYCASE_
     }
 
-  yysize1 = yysize + yystrlen (yyformat);
-  if (! (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM))
-    return 2;
-  yysize = yysize1;
+  {
+    YYSIZE_T yysize1 = yysize + yystrlen (yyformat);
+    if (! (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM))
+      return 2;
+    yysize = yysize1;
+  }
 
   if (*yymsg_alloc < yysize)
     {
@@ -1406,32 +1394,27 @@ yydestruct (yymsg, yytype, yyvaluep)
     {
 
       default:
-	break;
+        break;
     }
 }
 
 
-/* Prevent warnings from -Wmissing-prototypes.  */
-#ifdef YYPARSE_PARAM
-#if defined __STDC__ || defined __cplusplus
-int yyparse (void *YYPARSE_PARAM);
-#else
-int yyparse ();
-#endif
-#else /* ! YYPARSE_PARAM */
-#if defined __STDC__ || defined __cplusplus
-int yyparse (void);
-#else
-int yyparse ();
-#endif
-#endif /* ! YYPARSE_PARAM */
 
 
 /* The lookahead symbol.  */
 int yychar;
 
+
+#ifndef YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
+# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
+# define YY_IGNORE_MAYBE_UNINITIALIZED_END
+#endif
+#ifndef YY_INITIAL_VALUE
+# define YY_INITIAL_VALUE(Value) /* Nothing. */
+#endif
+
 /* The semantic value of the lookahead symbol.  */
-YYSTYPE yylval;
+YYSTYPE yylval YY_INITIAL_VALUE(yyval_default);
 
 /* Number of syntax errors so far.  */
 int yynerrs;
@@ -1489,7 +1472,7 @@ yyparse ()
   int yyn;
   int yyresult;
   /* Lookahead token as an internal (translated) token number.  */
-  int yytoken;
+  int yytoken = 0;
   /* The variables used to return semantic value and location from the
      action routines.  */
   YYSTYPE yyval;
@@ -1507,9 +1490,8 @@ yyparse ()
      Keep to zero when no symbol should be popped.  */
   int yylen = 0;
 
-  yytoken = 0;
-  yyss = yyssa;
-  yyvs = yyvsa;
+  yyssp = yyss = yyssa;
+  yyvsp = yyvs = yyvsa;
   yystacksize = YYINITDEPTH;
 
   YYDPRINTF ((stderr, "Starting parse\n"));
@@ -1518,14 +1500,6 @@ yyparse ()
   yyerrstatus = 0;
   yynerrs = 0;
   yychar = YYEMPTY; /* Cause a token to be read.  */
-
-  /* Initialize stack pointers.
-     Waste one element of value and location stack
-     so that they stay on the same level as the state stack.
-     The wasted elements are never initialized.  */
-  yyssp = yyss;
-  yyvsp = yyvs;
-
   goto yysetstate;
 
 /*------------------------------------------------------------.
@@ -1666,7 +1640,9 @@ yybackup:
   yychar = YYEMPTY;
 
   yystate = yyn;
+  YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
   *++yyvsp = yylval;
+  YY_IGNORE_MAYBE_UNINITIALIZED_END
 
   goto yynewstate;
 
@@ -1916,7 +1892,14 @@ yyreduce:
 
   case 69:
 
-    { (yyval) = (yyvsp[(4) - (4)]); }
+    { if (current_name != NULL) {
+		    error_with_pos("unexpected second declaration name");
+		    YYERROR;
+		  } else {
+		    current_name = (*(yyvsp[(1) - (1)]))->string;
+		    (yyval) = (yyvsp[(1) - (1)]);
+		  }
+		}
     break;
 
   case 70:
@@ -1926,12 +1909,12 @@ yyreduce:
 
   case 71:
 
-    { (yyval) = (yyvsp[(2) - (2)]); }
+    { (yyval) = (yyvsp[(4) - (4)]); }
     break;
 
   case 72:
 
-    { (yyval) = (yyvsp[(3) - (3)]); }
+    { (yyval) = (yyvsp[(2) - (2)]); }
     break;
 
   case 73:
@@ -1941,12 +1924,12 @@ yyreduce:
 
   case 74:
 
-    { (yyval) = (yyvsp[(2) - (2)]); }
+    { (yyval) = (yyvsp[(3) - (3)]); }
     break;
 
-  case 78:
+  case 75:
 
-    { (yyval) = (yyvsp[(4) - (4)]); }
+    { (yyval) = (yyvsp[(2) - (2)]); }
     break;
 
   case 79:
@@ -1956,12 +1939,12 @@ yyreduce:
 
   case 80:
 
-    { (yyval) = (yyvsp[(2) - (2)]); }
+    { (yyval) = (yyvsp[(4) - (4)]); }
     break;
 
   case 81:
 
-    { (yyval) = (yyvsp[(3) - (3)]); }
+    { (yyval) = (yyvsp[(2) - (2)]); }
     break;
 
   case 82:
@@ -1971,40 +1954,45 @@ yyreduce:
 
   case 83:
 
+    { (yyval) = (yyvsp[(3) - (3)]); }
+    break;
+
+  case 84:
+
     { (yyval) = (yyvsp[(2) - (2)]); }
     break;
 
-  case 85:
+  case 86:
 
     { (yyval) = (yyvsp[(3) - (3)]); }
     break;
 
-  case 86:
+  case 87:
 
     { (yyval) = NULL; }
     break;
 
-  case 89:
+  case 90:
 
     { (yyval) = (yyvsp[(3) - (3)]); }
     break;
 
-  case 90:
+  case 91:
 
     { (yyval) = (yyvsp[(2) - (2)]) ? (yyvsp[(2) - (2)]) : (yyvsp[(1) - (2)]); }
     break;
 
-  case 91:
+  case 92:
 
     { (yyval) = (yyvsp[(2) - (2)]) ? (yyvsp[(2) - (2)]) : (yyvsp[(1) - (2)]); }
     break;
 
-  case 93:
+  case 94:
 
     { (yyval) = NULL; }
     break;
 
-  case 94:
+  case 95:
 
     { /* For version 2 checksums, we don't want to remember
 		     private parameter names.  */
@@ -2013,39 +2001,39 @@ yyreduce:
 		}
     break;
 
-  case 95:
+  case 96:
 
     { remove_node((yyvsp[(1) - (1)]));
 		  (yyval) = (yyvsp[(1) - (1)]);
 		}
     break;
 
-  case 96:
+  case 97:
 
     { (yyval) = (yyvsp[(4) - (4)]); }
     break;
 
-  case 97:
+  case 98:
 
     { (yyval) = (yyvsp[(4) - (4)]); }
     break;
 
-  case 98:
+  case 99:
 
     { (yyval) = (yyvsp[(2) - (2)]); }
     break;
 
-  case 99:
+  case 100:
 
     { (yyval) = (yyvsp[(3) - (3)]); }
     break;
 
-  case 100:
+  case 101:
 
     { (yyval) = (yyvsp[(3) - (3)]); }
     break;
 
-  case 101:
+  case 102:
 
     { struct string_list *decl = *(yyvsp[(2) - (3)]);
 		  *(yyvsp[(2) - (3)]) = NULL;
@@ -2054,87 +2042,87 @@ yyreduce:
 		}
     break;
 
-  case 102:
+  case 103:
 
     { (yyval) = NULL; }
     break;
 
-  case 104:
+  case 105:
 
     { remove_list((yyvsp[(2) - (2)]), &(*(yyvsp[(1) - (2)]))->next); (yyval) = (yyvsp[(2) - (2)]); }
     break;
 
-  case 105:
+  case 106:
 
     { (yyval) = (yyvsp[(3) - (3)]); }
     break;
 
-  case 106:
+  case 107:
 
     { (yyval) = (yyvsp[(3) - (3)]); }
     break;
 
-  case 107:
+  case 108:
 
     { (yyval) = NULL; }
     break;
 
-  case 110:
+  case 111:
 
     { (yyval) = (yyvsp[(2) - (2)]); }
     break;
 
-  case 111:
+  case 112:
 
     { (yyval) = (yyvsp[(3) - (3)]); }
     break;
 
-  case 112:
+  case 113:
 
     { (yyval) = (yyvsp[(2) - (2)]); }
     break;
 
-  case 113:
+  case 114:
 
     { (yyval) = NULL; }
     break;
 
-  case 116:
+  case 117:
 
     { (yyval) = (yyvsp[(3) - (3)]); }
     break;
 
-  case 117:
+  case 118:
 
     { (yyval) = (yyvsp[(2) - (2)]) ? (yyvsp[(2) - (2)]) : (yyvsp[(1) - (2)]); }
     break;
 
-  case 118:
+  case 119:
 
     { (yyval) = (yyvsp[(2) - (2)]); }
     break;
 
-  case 120:
+  case 121:
 
     { (yyval) = (yyvsp[(2) - (2)]); }
     break;
 
-  case 121:
+  case 122:
 
     { (yyval) = NULL; }
     break;
 
-  case 123:
+  case 124:
 
     { (yyval) = (yyvsp[(3) - (3)]); }
     break;
 
-  case 124:
+  case 125:
 
     { (yyval) = (yyvsp[(4) - (4)]); }
     break;
 
-  case 127:
+  case 128:
 
     {
 			const char *name = strdup((*(yyvsp[(1) - (1)]))->string);
@@ -2142,7 +2130,7 @@ yyreduce:
 		}
     break;
 
-  case 128:
+  case 129:
 
     {
 			const char *name = strdup((*(yyvsp[(1) - (3)]))->string);
@@ -2151,17 +2139,17 @@ yyreduce:
 		}
     break;
 
-  case 129:
+  case 130:
 
     { (yyval) = (yyvsp[(2) - (2)]); }
     break;
 
-  case 130:
+  case 131:
 
     { (yyval) = NULL; }
     break;
 
-  case 132:
+  case 133:
 
     { export_symbol((*(yyvsp[(3) - (5)]))->string); (yyval) = (yyvsp[(5) - (5)]); }
     break;
@@ -2330,7 +2318,9 @@ yyerrlab1:
       YY_STACK_PRINT (yyss, yyssp);
     }
 
+  YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
   *++yyvsp = yylval;
+  YY_IGNORE_MAYBE_UNINITIALIZED_END
 
 
   /* Shift the error token.  */
@@ -2404,4 +2394,3 @@ yyerror(const char *e)
 {
   error_with_pos("%s", e);
 }
-
diff --git a/scripts/genksyms/parse.tab.h_shipped b/scripts/genksyms/parse.tab.h_shipped
index a4737dec45329c..4c00cef6d71ddd 100644
--- a/scripts/genksyms/parse.tab.h_shipped
+++ b/scripts/genksyms/parse.tab.h_shipped
@@ -1,4 +1,4 @@
-/* A Bison parser, made by GNU Bison 2.5.1.  */
+/* A Bison parser, made by GNU Bison 2.7.  */
 
 /* Bison interface for Yacc-like parsers in C
    
@@ -30,6 +30,15 @@
    This special exception was added by the Free Software Foundation in
    version 2.2 of Bison.  */
 
+#ifndef YY_YY_SCRIPTS_GENKSYMS_PARSE_TAB_H_SHIPPED_INCLUDED
+# define YY_YY_SCRIPTS_GENKSYMS_PARSE_TAB_H_SHIPPED_INCLUDED
+/* Enabling traces.  */
+#ifndef YYDEBUG
+# define YYDEBUG 1
+#endif
+#if YYDEBUG
+extern int yydebug;
+#endif
 
 /* Tokens.  */
 #ifndef YYTOKENTYPE
@@ -83,7 +92,6 @@
 #endif
 
 
-
 #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
 typedef int YYSTYPE;
 # define YYSTYPE_IS_TRIVIAL 1
@@ -93,4 +101,18 @@ typedef int YYSTYPE;
 
 extern YYSTYPE yylval;
 
+#ifdef YYPARSE_PARAM
+#if defined __STDC__ || defined __cplusplus
+int yyparse (void *YYPARSE_PARAM);
+#else
+int yyparse ();
+#endif
+#else /* ! YYPARSE_PARAM */
+#if defined __STDC__ || defined __cplusplus
+int yyparse (void);
+#else
+int yyparse ();
+#endif
+#endif /* ! YYPARSE_PARAM */
 
+#endif /* !YY_YY_SCRIPTS_GENKSYMS_PARSE_TAB_H_SHIPPED_INCLUDED  */

From 40603526569b304dd92f720f2f8ab11e828ea145 Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Tue, 18 Aug 2015 13:55:36 -0600
Subject: [PATCH 343/734] pmem, x86: move x86 PMEM API to new pmem.h header

Move the x86 PMEM API implementation out of asm/cacheflush.h and into
its own header asm/pmem.h.  This will allow members of the PMEM API to
be more easily identified on this and other architectures.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Suggested-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 MAINTAINERS                       |  1 +
 arch/x86/include/asm/cacheflush.h | 71 ------------------------
 arch/x86/include/asm/pmem.h       | 92 +++++++++++++++++++++++++++++++
 include/linux/pmem.h              |  2 +-
 4 files changed, 94 insertions(+), 72 deletions(-)
 create mode 100644 arch/x86/include/asm/pmem.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 9289ecb57b68d1..8fcde3717ab7e0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6161,6 +6161,7 @@ Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
 S:	Supported
 F:	drivers/nvdimm/pmem.c
 F:	include/linux/pmem.h
+F:	arch/*/include/asm/pmem.h
 
 LINUX FOR IBM pSERIES (RS/6000)
 M:	Paul Mackerras <paulus@au.ibm.com>
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index 9bf3ea14b9f0a2..471418ac1ff951 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -109,75 +109,4 @@ static inline int rodata_test(void)
 }
 #endif
 
-#ifdef ARCH_HAS_NOCACHE_UACCESS
-
-/**
- * arch_memcpy_to_pmem - copy data to persistent memory
- * @dst: destination buffer for the copy
- * @src: source buffer for the copy
- * @n: length of the copy in bytes
- *
- * Copy data to persistent memory media via non-temporal stores so that
- * a subsequent arch_wmb_pmem() can flush cpu and memory controller
- * write buffers to guarantee durability.
- */
-static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
-		size_t n)
-{
-	int unwritten;
-
-	/*
-	 * We are copying between two kernel buffers, if
-	 * __copy_from_user_inatomic_nocache() returns an error (page
-	 * fault) we would have already reported a general protection fault
-	 * before the WARN+BUG.
-	 */
-	unwritten = __copy_from_user_inatomic_nocache((void __force *) dst,
-			(void __user *) src, n);
-	if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n",
-				__func__, dst, src, unwritten))
-		BUG();
-}
-
-/**
- * arch_wmb_pmem - synchronize writes to persistent memory
- *
- * After a series of arch_memcpy_to_pmem() operations this drains data
- * from cpu write buffers and any platform (memory controller) buffers
- * to ensure that written data is durable on persistent memory media.
- */
-static inline void arch_wmb_pmem(void)
-{
-	/*
-	 * wmb() to 'sfence' all previous writes such that they are
-	 * architecturally visible to 'pcommit'.  Note, that we've
-	 * already arranged for pmem writes to avoid the cache via
-	 * arch_memcpy_to_pmem().
-	 */
-	wmb();
-	pcommit_sfence();
-}
-
-static inline bool __arch_has_wmb_pmem(void)
-{
-#ifdef CONFIG_X86_64
-	/*
-	 * We require that wmb() be an 'sfence', that is only guaranteed on
-	 * 64-bit builds
-	 */
-	return static_cpu_has(X86_FEATURE_PCOMMIT);
-#else
-	return false;
-#endif
-}
-#else /* ARCH_HAS_NOCACHE_UACCESS i.e. ARCH=um */
-extern void arch_memcpy_to_pmem(void __pmem *dst, const void *src, size_t n);
-extern void arch_wmb_pmem(void);
-
-static inline bool __arch_has_wmb_pmem(void)
-{
-	return false;
-}
-#endif
-
 #endif /* _ASM_X86_CACHEFLUSH_H */
diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
new file mode 100644
index 00000000000000..f43462cc91aa99
--- /dev/null
+++ b/arch/x86/include/asm/pmem.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __ASM_X86_PMEM_H__
+#define __ASM_X86_PMEM_H__
+
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
+#include <asm/special_insns.h>
+
+#ifdef ARCH_HAS_NOCACHE_UACCESS
+
+/**
+ * arch_memcpy_to_pmem - copy data to persistent memory
+ * @dst: destination buffer for the copy
+ * @src: source buffer for the copy
+ * @n: length of the copy in bytes
+ *
+ * Copy data to persistent memory media via non-temporal stores so that
+ * a subsequent arch_wmb_pmem() can flush cpu and memory controller
+ * write buffers to guarantee durability.
+ */
+static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
+		size_t n)
+{
+	int unwritten;
+
+	/*
+	 * We are copying between two kernel buffers, if
+	 * __copy_from_user_inatomic_nocache() returns an error (page
+	 * fault) we would have already reported a general protection fault
+	 * before the WARN+BUG.
+	 */
+	unwritten = __copy_from_user_inatomic_nocache((void __force *) dst,
+			(void __user *) src, n);
+	if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n",
+				__func__, dst, src, unwritten))
+		BUG();
+}
+
+/**
+ * arch_wmb_pmem - synchronize writes to persistent memory
+ *
+ * After a series of arch_memcpy_to_pmem() operations this drains data
+ * from cpu write buffers and any platform (memory controller) buffers
+ * to ensure that written data is durable on persistent memory media.
+ */
+static inline void arch_wmb_pmem(void)
+{
+	/*
+	 * wmb() to 'sfence' all previous writes such that they are
+	 * architecturally visible to 'pcommit'.  Note, that we've
+	 * already arranged for pmem writes to avoid the cache via
+	 * arch_memcpy_to_pmem().
+	 */
+	wmb();
+	pcommit_sfence();
+}
+
+static inline bool __arch_has_wmb_pmem(void)
+{
+#ifdef CONFIG_X86_64
+	/*
+	 * We require that wmb() be an 'sfence', that is only guaranteed on
+	 * 64-bit builds
+	 */
+	return static_cpu_has(X86_FEATURE_PCOMMIT);
+#else
+	return false;
+#endif
+}
+#else /* ARCH_HAS_NOCACHE_UACCESS i.e. ARCH=um */
+extern void arch_memcpy_to_pmem(void __pmem *dst, const void *src, size_t n);
+extern void arch_wmb_pmem(void);
+
+static inline bool __arch_has_wmb_pmem(void)
+{
+	return false;
+}
+#endif
+
+#endif /* __ASM_X86_PMEM_H__ */
diff --git a/include/linux/pmem.h b/include/linux/pmem.h
index 20c367cd76e6ae..c2af613ec29779 100644
--- a/include/linux/pmem.h
+++ b/include/linux/pmem.h
@@ -16,7 +16,7 @@
 #include <linux/io.h>
 
 #ifdef CONFIG_ARCH_HAS_PMEM_API
-#include <asm/cacheflush.h>
+#include <asm/pmem.h>
 #else
 static inline void arch_wmb_pmem(void)
 {

From 18279b467a9d89afe44afbc19d768e834dbf4545 Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Tue, 18 Aug 2015 13:55:37 -0600
Subject: [PATCH 344/734] pmem: remove layer when calling arch_has_wmb_pmem()

Prior to this change arch_has_wmb_pmem() was only called by
arch_has_pmem_api().  Both arch_has_wmb_pmem() and arch_has_pmem_api()
checked to make sure that CONFIG_ARCH_HAS_PMEM_API was enabled.

Instead, remove the old arch_has_wmb_pmem() wrapper to be rid of one
extra layer of indirection and the redundant CONFIG_ARCH_HAS_PMEM_API
check. Rename __arch_has_wmb_pmem() to arch_has_wmb_pmem() since we no
longer have a wrapper, and just have arch_has_pmem_api() call the
architecture specific arch_has_wmb_pmem() directly.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/x86/include/asm/pmem.h |  2 +-
 include/linux/pmem.h        | 13 +++----------
 2 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index f43462cc91aa99..1e8dbb72d6eec3 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -67,7 +67,7 @@ static inline void arch_wmb_pmem(void)
 	pcommit_sfence();
 }
 
-static inline bool __arch_has_wmb_pmem(void)
+static inline bool arch_has_wmb_pmem(void)
 {
 #ifdef CONFIG_X86_64
 	/*
diff --git a/include/linux/pmem.h b/include/linux/pmem.h
index c2af613ec29779..a0706ea04efdff 100644
--- a/include/linux/pmem.h
+++ b/include/linux/pmem.h
@@ -23,7 +23,7 @@ static inline void arch_wmb_pmem(void)
 	BUG();
 }
 
-static inline bool __arch_has_wmb_pmem(void)
+static inline bool arch_has_wmb_pmem(void)
 {
 	return false;
 }
@@ -38,7 +38,7 @@ static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
 /*
  * Architectures that define ARCH_HAS_PMEM_API must provide
  * implementations for arch_memcpy_to_pmem(), arch_wmb_pmem(), and
- * __arch_has_wmb_pmem().
+ * arch_has_wmb_pmem().
  */
 
 static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t size)
@@ -52,7 +52,7 @@ static inline void memunmap_pmem(struct device *dev, void __pmem *addr)
 }
 
 /**
- * arch_has_wmb_pmem - true if wmb_pmem() ensures durability
+ * arch_has_pmem_api - true if wmb_pmem() ensures durability
  *
  * For a given cpu implementation within an architecture it is possible
  * that wmb_pmem() resolves to a nop.  In the case this returns
@@ -60,13 +60,6 @@ static inline void memunmap_pmem(struct device *dev, void __pmem *addr)
  * fall back to a different data consistency model, or otherwise notify
  * the user.
  */
-static inline bool arch_has_wmb_pmem(void)
-{
-	if (IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API))
-		return __arch_has_wmb_pmem();
-	return false;
-}
-
 static inline bool arch_has_pmem_api(void)
 {
 	return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API) && arch_has_wmb_pmem();

From 4a370df5534ef727cba9a9d74bf22e0609f91d6e Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Tue, 18 Aug 2015 13:55:38 -0600
Subject: [PATCH 345/734] pmem, x86: clean up conditional pmem includes

Prior to this change x86_64 used the pmem defines in
arch/x86/include/asm/pmem.h, and UM used the default ones at the
top of include/linux/pmem.h.  The inclusion or exclusion in linux/pmem.h
was controlled by CONFIG_ARCH_HAS_PMEM_API, but the ones in asm/pmem.h
were controlled by ARCH_HAS_NOCACHE_UACCESS.

Instead, control them both with CONFIG_ARCH_HAS_PMEM_API so that it's
clear that they are related and we don't run into the possibility where
they are both included or excluded.  Also remove a bunch of stale
function prototypes meant for UM in asm/pmem.h - these just conflicted
with the inline defaults in linux/pmem.h and gave compile errors.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/x86/include/asm/pmem.h | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index 1e8dbb72d6eec3..7f3413fce46c19 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -18,8 +18,7 @@
 #include <asm/cpufeature.h>
 #include <asm/special_insns.h>
 
-#ifdef ARCH_HAS_NOCACHE_UACCESS
-
+#ifdef CONFIG_ARCH_HAS_PMEM_API
 /**
  * arch_memcpy_to_pmem - copy data to persistent memory
  * @dst: destination buffer for the copy
@@ -79,14 +78,6 @@ static inline bool arch_has_wmb_pmem(void)
 	return false;
 #endif
 }
-#else /* ARCH_HAS_NOCACHE_UACCESS i.e. ARCH=um */
-extern void arch_memcpy_to_pmem(void __pmem *dst, const void *src, size_t n);
-extern void arch_wmb_pmem(void);
-
-static inline bool __arch_has_wmb_pmem(void)
-{
-	return false;
-}
-#endif
+#endif /* CONFIG_ARCH_HAS_PMEM_API */
 
 #endif /* __ASM_X86_PMEM_H__ */

From 5de490daec8b6354b90d5c9d3e2415b195f5adb6 Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Tue, 18 Aug 2015 13:55:39 -0600
Subject: [PATCH 346/734] pmem: add copy_from_iter_pmem() and clear_pmem()

Add support for two new PMEM APIs, copy_from_iter_pmem() and
clear_pmem().  copy_from_iter_pmem() is used to copy data from an
iterator into a PMEM buffer.  clear_pmem() zeros a PMEM memory range.

Both of these new APIs must be explicitly ordered using a wmb_pmem()
function call and are implemented in such a way that the wmb_pmem()
will make the stores to PMEM durable.  Because both APIs are unordered
they can be called as needed without introducing any unwanted memory
barriers.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/x86/include/asm/pmem.h | 75 +++++++++++++++++++++++++++++++++++++
 include/linux/pmem.h        | 64 ++++++++++++++++++++++++++++++-
 2 files changed, 137 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index 7f3413fce46c19..a3a0df6545eef1 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -66,6 +66,81 @@ static inline void arch_wmb_pmem(void)
 	pcommit_sfence();
 }
 
+/**
+ * __arch_wb_cache_pmem - write back a cache range with CLWB
+ * @vaddr:	virtual start address
+ * @size:	number of bytes to write back
+ *
+ * Write back a cache range using the CLWB (cache line write back)
+ * instruction.  This function requires explicit ordering with an
+ * arch_wmb_pmem() call.  This API is internal to the x86 PMEM implementation.
+ */
+static inline void __arch_wb_cache_pmem(void *vaddr, size_t size)
+{
+	u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;
+	unsigned long clflush_mask = x86_clflush_size - 1;
+	void *vend = vaddr + size;
+	void *p;
+
+	for (p = (void *)((unsigned long)vaddr & ~clflush_mask);
+	     p < vend; p += x86_clflush_size)
+		clwb(p);
+}
+
+/*
+ * copy_from_iter_nocache() on x86 only uses non-temporal stores for iovec
+ * iterators, so for other types (bvec & kvec) we must do a cache write-back.
+ */
+static inline bool __iter_needs_pmem_wb(struct iov_iter *i)
+{
+	return iter_is_iovec(i) == false;
+}
+
+/**
+ * arch_copy_from_iter_pmem - copy data from an iterator to PMEM
+ * @addr:	PMEM destination address
+ * @bytes:	number of bytes to copy
+ * @i:		iterator with source data
+ *
+ * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'.
+ * This function requires explicit ordering with an arch_wmb_pmem() call.
+ */
+static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
+		struct iov_iter *i)
+{
+	void *vaddr = (void __force *)addr;
+	size_t len;
+
+	/* TODO: skip the write-back by always using non-temporal stores */
+	len = copy_from_iter_nocache(vaddr, bytes, i);
+
+	if (__iter_needs_pmem_wb(i))
+		__arch_wb_cache_pmem(vaddr, bytes);
+
+	return len;
+}
+
+/**
+ * arch_clear_pmem - zero a PMEM memory range
+ * @addr:	virtual start address
+ * @size:	number of bytes to zero
+ *
+ * Write zeros into the memory range starting at 'addr' for 'size' bytes.
+ * This function requires explicit ordering with an arch_wmb_pmem() call.
+ */
+static inline void arch_clear_pmem(void __pmem *addr, size_t size)
+{
+	void *vaddr = (void __force *)addr;
+
+	/* TODO: implement the zeroing via non-temporal writes */
+	if (size == PAGE_SIZE && ((unsigned long)vaddr & ~PAGE_MASK) == 0)
+		clear_page(vaddr);
+	else
+		memset(vaddr, 0, size);
+
+	__arch_wb_cache_pmem(vaddr, size);
+}
+
 static inline bool arch_has_wmb_pmem(void)
 {
 #ifdef CONFIG_X86_64
diff --git a/include/linux/pmem.h b/include/linux/pmem.h
index a0706ea04efdff..a9d84bf335eeea 100644
--- a/include/linux/pmem.h
+++ b/include/linux/pmem.h
@@ -14,6 +14,7 @@
 #define __PMEM_H__
 
 #include <linux/io.h>
+#include <linux/uio.h>
 
 #ifdef CONFIG_ARCH_HAS_PMEM_API
 #include <asm/pmem.h>
@@ -33,12 +34,24 @@ static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
 {
 	BUG();
 }
+
+static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
+		struct iov_iter *i)
+{
+	BUG();
+	return 0;
+}
+
+static inline void arch_clear_pmem(void __pmem *addr, size_t size)
+{
+	BUG();
+}
 #endif
 
 /*
  * Architectures that define ARCH_HAS_PMEM_API must provide
- * implementations for arch_memcpy_to_pmem(), arch_wmb_pmem(), and
- * arch_has_wmb_pmem().
+ * implementations for arch_memcpy_to_pmem(), arch_wmb_pmem(),
+ * arch_copy_from_iter_pmem(), arch_clear_pmem() and arch_has_wmb_pmem().
  */
 
 static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t size)
@@ -78,6 +91,20 @@ static inline void default_memcpy_to_pmem(void __pmem *dst, const void *src,
 	memcpy((void __force *) dst, src, size);
 }
 
+static inline size_t default_copy_from_iter_pmem(void __pmem *addr,
+		size_t bytes, struct iov_iter *i)
+{
+	return copy_from_iter_nocache((void __force *)addr, bytes, i);
+}
+
+static inline void default_clear_pmem(void __pmem *addr, size_t size)
+{
+	if (size == PAGE_SIZE && ((unsigned long)addr & ~PAGE_MASK) == 0)
+		clear_page((void __force *)addr);
+	else
+		memset((void __force *)addr, 0, size);
+}
+
 /**
  * memremap_pmem - map physical persistent memory for pmem api
  * @offset: physical address of persistent memory
@@ -134,4 +161,37 @@ static inline void wmb_pmem(void)
 	if (arch_has_pmem_api())
 		arch_wmb_pmem();
 }
+
+/**
+ * copy_from_iter_pmem - copy data from an iterator to PMEM
+ * @addr:	PMEM destination address
+ * @bytes:	number of bytes to copy
+ * @i:		iterator with source data
+ *
+ * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'.
+ * This function requires explicit ordering with a wmb_pmem() call.
+ */
+static inline size_t copy_from_iter_pmem(void __pmem *addr, size_t bytes,
+		struct iov_iter *i)
+{
+	if (arch_has_pmem_api())
+		return arch_copy_from_iter_pmem(addr, bytes, i);
+	return default_copy_from_iter_pmem(addr, bytes, i);
+}
+
+/**
+ * clear_pmem - zero a PMEM memory range
+ * @addr:	virtual start address
+ * @size:	number of bytes to zero
+ *
+ * Write zeros into the memory range starting at 'addr' for 'size' bytes.
+ * This function requires explicit ordering with a wmb_pmem() call.
+ */
+static inline void clear_pmem(void __pmem *addr, size_t size)
+{
+	if (arch_has_pmem_api())
+		arch_clear_pmem(addr, size);
+	else
+		default_clear_pmem(addr, size);
+}
 #endif /* __PMEM_H__ */

From 2765cfbb342c727c3fd47b165196cb16da158022 Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Tue, 18 Aug 2015 13:55:40 -0600
Subject: [PATCH 347/734] dax: update I/O path to do proper PMEM flushing

Update the DAX I/O path so that all operations that store data (I/O
writes, zeroing blocks, punching holes, etc.) properly synchronize the
stores to media using the PMEM API.  This ensures that the data DAX is
writing is durable on media before the operation completes.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/dax.c | 39 +++++++++++++++++++++++++--------------
 1 file changed, 25 insertions(+), 14 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index c3e21ccfc358b2..e07fecc93f8001 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -23,6 +23,7 @@
 #include <linux/memcontrol.h>
 #include <linux/mm.h>
 #include <linux/mutex.h>
+#include <linux/pmem.h>
 #include <linux/sched.h>
 #include <linux/uio.h>
 #include <linux/vmstat.h>
@@ -46,10 +47,7 @@ int dax_clear_blocks(struct inode *inode, sector_t block, long size)
 			unsigned pgsz = PAGE_SIZE - offset_in_page(addr);
 			if (pgsz > count)
 				pgsz = count;
-			if (pgsz < PAGE_SIZE)
-				memset(addr, 0, pgsz);
-			else
-				clear_page(addr);
+			clear_pmem((void __pmem *)addr, pgsz);
 			addr += pgsz;
 			size -= pgsz;
 			count -= pgsz;
@@ -59,6 +57,7 @@ int dax_clear_blocks(struct inode *inode, sector_t block, long size)
 		}
 	} while (size);
 
+	wmb_pmem();
 	return 0;
 }
 EXPORT_SYMBOL_GPL(dax_clear_blocks);
@@ -70,15 +69,16 @@ static long dax_get_addr(struct buffer_head *bh, void **addr, unsigned blkbits)
 	return bdev_direct_access(bh->b_bdev, sector, addr, &pfn, bh->b_size);
 }
 
+/* the clear_pmem() calls are ordered by a wmb_pmem() in the caller */
 static void dax_new_buf(void *addr, unsigned size, unsigned first, loff_t pos,
 			loff_t end)
 {
 	loff_t final = end - pos + first; /* The final byte of the buffer */
 
 	if (first > 0)
-		memset(addr, 0, first);
+		clear_pmem((void __pmem *)addr, first);
 	if (final < size)
-		memset(addr + final, 0, size - final);
+		clear_pmem((void __pmem *)addr + final, size - final);
 }
 
 static bool buffer_written(struct buffer_head *bh)
@@ -108,12 +108,13 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
 	loff_t bh_max = start;
 	void *addr;
 	bool hole = false;
+	bool need_wmb = false;
 
 	if (iov_iter_rw(iter) != WRITE)
 		end = min(end, i_size_read(inode));
 
 	while (pos < end) {
-		unsigned len;
+		size_t len;
 		if (pos == max) {
 			unsigned blkbits = inode->i_blkbits;
 			sector_t block = pos >> blkbits;
@@ -145,18 +146,22 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
 				retval = dax_get_addr(bh, &addr, blkbits);
 				if (retval < 0)
 					break;
-				if (buffer_unwritten(bh) || buffer_new(bh))
+				if (buffer_unwritten(bh) || buffer_new(bh)) {
 					dax_new_buf(addr, retval, first, pos,
 									end);
+					need_wmb = true;
+				}
 				addr += first;
 				size = retval - first;
 			}
 			max = min(pos + size, end);
 		}
 
-		if (iov_iter_rw(iter) == WRITE)
-			len = copy_from_iter_nocache(addr, max - pos, iter);
-		else if (!hole)
+		if (iov_iter_rw(iter) == WRITE) {
+			len = copy_from_iter_pmem((void __pmem *)addr,
+					max - pos, iter);
+			need_wmb = true;
+		} else if (!hole)
 			len = copy_to_iter(addr, max - pos, iter);
 		else
 			len = iov_iter_zero(max - pos, iter);
@@ -168,6 +173,9 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
 		addr += len;
 	}
 
+	if (need_wmb)
+		wmb_pmem();
+
 	return (pos == start) ? retval : pos - start;
 }
 
@@ -303,8 +311,10 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
 		goto out;
 	}
 
-	if (buffer_unwritten(bh) || buffer_new(bh))
-		clear_page(addr);
+	if (buffer_unwritten(bh) || buffer_new(bh)) {
+		clear_pmem((void __pmem *)addr, PAGE_SIZE);
+		wmb_pmem();
+	}
 
 	error = vm_insert_mixed(vma, vaddr, pfn);
 
@@ -542,7 +552,8 @@ int dax_zero_page_range(struct inode *inode, loff_t from, unsigned length,
 		err = dax_get_addr(&bh, &addr, inode->i_blkbits);
 		if (err < 0)
 			return err;
-		memset(addr + offset, 0, length);
+		clear_pmem((void __pmem *)addr + offset, length);
+		wmb_pmem();
 	}
 
 	return 0;

From e2e05394e4a3420dab96f728df4531893494e15d Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Tue, 18 Aug 2015 13:55:41 -0600
Subject: [PATCH 348/734] pmem, dax: have direct_access use __pmem annotation

Update the annotation for the kaddr pointer returned by direct_access()
so that it is a __pmem pointer.  This is consistent with the PMEM driver
and with how this direct_access() pointer is used in the DAX code.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 Documentation/filesystems/Locking |  3 ++-
 arch/powerpc/sysdev/axonram.c     |  7 +++---
 drivers/block/brd.c               |  4 ++--
 drivers/nvdimm/pmem.c             |  4 ++--
 drivers/s390/block/dcssblk.c      | 10 +++++----
 fs/block_dev.c                    |  2 +-
 fs/dax.c                          | 37 +++++++++++++++++--------------
 include/linux/blkdev.h            |  8 +++----
 8 files changed, 41 insertions(+), 34 deletions(-)

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 6a34a0f4d37ccf..06d443450f2138 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -397,7 +397,8 @@ prototypes:
 	int (*release) (struct gendisk *, fmode_t);
 	int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
-	int (*direct_access) (struct block_device *, sector_t, void **, unsigned long *);
+	int (*direct_access) (struct block_device *, sector_t, void __pmem **,
+				unsigned long *);
 	int (*media_changed) (struct gendisk *);
 	void (*unlock_native_capacity) (struct gendisk *);
 	int (*revalidate_disk) (struct gendisk *);
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index ee90db17b0972a..a2be2a66dab6d1 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -141,13 +141,14 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio)
  */
 static long
 axon_ram_direct_access(struct block_device *device, sector_t sector,
-		       void **kaddr, unsigned long *pfn, long size)
+		       void __pmem **kaddr, unsigned long *pfn, long size)
 {
 	struct axon_ram_bank *bank = device->bd_disk->private_data;
 	loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT;
+	void *addr = (void *)(bank->ph_addr + offset);
 
-	*kaddr = (void *)(bank->ph_addr + offset);
-	*pfn = virt_to_phys(*kaddr) >> PAGE_SHIFT;
+	*kaddr = (void __pmem *)addr;
+	*pfn = virt_to_phys(addr) >> PAGE_SHIFT;
 
 	return bank->size - offset;
 }
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 64ab4951e9d678..c96402fd156059 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -371,7 +371,7 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector,
 
 #ifdef CONFIG_BLK_DEV_RAM_DAX
 static long brd_direct_access(struct block_device *bdev, sector_t sector,
-			void **kaddr, unsigned long *pfn, long size)
+			void __pmem **kaddr, unsigned long *pfn, long size)
 {
 	struct brd_device *brd = bdev->bd_disk->private_data;
 	struct page *page;
@@ -381,7 +381,7 @@ static long brd_direct_access(struct block_device *bdev, sector_t sector,
 	page = brd_insert_page(brd, sector);
 	if (!page)
 		return -ENOSPC;
-	*kaddr = page_address(page);
+	*kaddr = (void __pmem *)page_address(page);
 	*pfn = page_to_pfn(page);
 
 	/*
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index eb7552d939e1bf..f3b629779266aa 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -92,7 +92,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
 }
 
 static long pmem_direct_access(struct block_device *bdev, sector_t sector,
-			      void **kaddr, unsigned long *pfn, long size)
+		      void __pmem **kaddr, unsigned long *pfn, long size)
 {
 	struct pmem_device *pmem = bdev->bd_disk->private_data;
 	size_t offset = sector << 9;
@@ -101,7 +101,7 @@ static long pmem_direct_access(struct block_device *bdev, sector_t sector,
 		return -ENODEV;
 
 	/* FIXME convert DAX to comprehend that this mapping has a lifetime */
-	*kaddr = (void __force *) pmem->virt_addr + offset;
+	*kaddr = pmem->virt_addr + offset;
 	*pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT;
 
 	return pmem->size - offset;
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index da212813f2d5dc..2c5a397b9f3e7e 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -29,7 +29,7 @@ static int dcssblk_open(struct block_device *bdev, fmode_t mode);
 static void dcssblk_release(struct gendisk *disk, fmode_t mode);
 static void dcssblk_make_request(struct request_queue *q, struct bio *bio);
 static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum,
-				 void **kaddr, unsigned long *pfn, long size);
+			 void __pmem **kaddr, unsigned long *pfn, long size);
 
 static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0";
 
@@ -879,18 +879,20 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio)
 
 static long
 dcssblk_direct_access (struct block_device *bdev, sector_t secnum,
-			void **kaddr, unsigned long *pfn, long size)
+			void __pmem **kaddr, unsigned long *pfn, long size)
 {
 	struct dcssblk_dev_info *dev_info;
 	unsigned long offset, dev_sz;
+	void *addr;
 
 	dev_info = bdev->bd_disk->private_data;
 	if (!dev_info)
 		return -ENODEV;
 	dev_sz = dev_info->end - dev_info->start;
 	offset = secnum * 512;
-	*kaddr = (void *) (dev_info->start + offset);
-	*pfn = virt_to_phys(*kaddr) >> PAGE_SHIFT;
+	addr = (void *) (dev_info->start + offset);
+	*pfn = virt_to_phys(addr) >> PAGE_SHIFT;
+	*kaddr = (void __pmem *) addr;
 
 	return dev_sz - offset;
 }
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 198243717da567..2345a9870e2ca7 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -441,7 +441,7 @@ EXPORT_SYMBOL_GPL(bdev_write_page);
  * accessible at this address.
  */
 long bdev_direct_access(struct block_device *bdev, sector_t sector,
-			void **addr, unsigned long *pfn, long size)
+			void __pmem **addr, unsigned long *pfn, long size)
 {
 	long avail;
 	const struct block_device_operations *ops = bdev->bd_disk->fops;
diff --git a/fs/dax.c b/fs/dax.c
index e07fecc93f8001..7c634ac797b17c 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -35,7 +35,7 @@ int dax_clear_blocks(struct inode *inode, sector_t block, long size)
 
 	might_sleep();
 	do {
-		void *addr;
+		void __pmem *addr;
 		unsigned long pfn;
 		long count;
 
@@ -47,7 +47,7 @@ int dax_clear_blocks(struct inode *inode, sector_t block, long size)
 			unsigned pgsz = PAGE_SIZE - offset_in_page(addr);
 			if (pgsz > count)
 				pgsz = count;
-			clear_pmem((void __pmem *)addr, pgsz);
+			clear_pmem(addr, pgsz);
 			addr += pgsz;
 			size -= pgsz;
 			count -= pgsz;
@@ -62,7 +62,8 @@ int dax_clear_blocks(struct inode *inode, sector_t block, long size)
 }
 EXPORT_SYMBOL_GPL(dax_clear_blocks);
 
-static long dax_get_addr(struct buffer_head *bh, void **addr, unsigned blkbits)
+static long dax_get_addr(struct buffer_head *bh, void __pmem **addr,
+		unsigned blkbits)
 {
 	unsigned long pfn;
 	sector_t sector = bh->b_blocknr << (blkbits - 9);
@@ -70,15 +71,15 @@ static long dax_get_addr(struct buffer_head *bh, void **addr, unsigned blkbits)
 }
 
 /* the clear_pmem() calls are ordered by a wmb_pmem() in the caller */
-static void dax_new_buf(void *addr, unsigned size, unsigned first, loff_t pos,
-			loff_t end)
+static void dax_new_buf(void __pmem *addr, unsigned size, unsigned first,
+		loff_t pos, loff_t end)
 {
 	loff_t final = end - pos + first; /* The final byte of the buffer */
 
 	if (first > 0)
-		clear_pmem((void __pmem *)addr, first);
+		clear_pmem(addr, first);
 	if (final < size)
-		clear_pmem((void __pmem *)addr + final, size - final);
+		clear_pmem(addr + final, size - final);
 }
 
 static bool buffer_written(struct buffer_head *bh)
@@ -106,7 +107,7 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
 	loff_t pos = start;
 	loff_t max = start;
 	loff_t bh_max = start;
-	void *addr;
+	void __pmem *addr;
 	bool hole = false;
 	bool need_wmb = false;
 
@@ -158,11 +159,11 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
 		}
 
 		if (iov_iter_rw(iter) == WRITE) {
-			len = copy_from_iter_pmem((void __pmem *)addr,
-					max - pos, iter);
+			len = copy_from_iter_pmem(addr, max - pos, iter);
 			need_wmb = true;
 		} else if (!hole)
-			len = copy_to_iter(addr, max - pos, iter);
+			len = copy_to_iter((void __force *)addr, max - pos,
+					iter);
 		else
 			len = iov_iter_zero(max - pos, iter);
 
@@ -268,11 +269,13 @@ static int dax_load_hole(struct address_space *mapping, struct page *page,
 static int copy_user_bh(struct page *to, struct buffer_head *bh,
 			unsigned blkbits, unsigned long vaddr)
 {
-	void *vfrom, *vto;
+	void __pmem *vfrom;
+	void *vto;
+
 	if (dax_get_addr(bh, &vfrom, blkbits) < 0)
 		return -EIO;
 	vto = kmap_atomic(to);
-	copy_user_page(vto, vfrom, vaddr, to);
+	copy_user_page(vto, (void __force *)vfrom, vaddr, to);
 	kunmap_atomic(vto);
 	return 0;
 }
@@ -283,7 +286,7 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
 	struct address_space *mapping = inode->i_mapping;
 	sector_t sector = bh->b_blocknr << (inode->i_blkbits - 9);
 	unsigned long vaddr = (unsigned long)vmf->virtual_address;
-	void *addr;
+	void __pmem *addr;
 	unsigned long pfn;
 	pgoff_t size;
 	int error;
@@ -312,7 +315,7 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
 	}
 
 	if (buffer_unwritten(bh) || buffer_new(bh)) {
-		clear_pmem((void __pmem *)addr, PAGE_SIZE);
+		clear_pmem(addr, PAGE_SIZE);
 		wmb_pmem();
 	}
 
@@ -548,11 +551,11 @@ int dax_zero_page_range(struct inode *inode, loff_t from, unsigned length,
 	if (err < 0)
 		return err;
 	if (buffer_written(&bh)) {
-		void *addr;
+		void __pmem *addr;
 		err = dax_get_addr(&bh, &addr, inode->i_blkbits);
 		if (err < 0)
 			return err;
-		clear_pmem((void __pmem *)addr + offset, length);
+		clear_pmem(addr + offset, length);
 		wmb_pmem();
 	}
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d4068c17d0df91..c401ecdff9cb45 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1555,8 +1555,8 @@ struct block_device_operations {
 	int (*rw_page)(struct block_device *, sector_t, struct page *, int rw);
 	int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
-	long (*direct_access)(struct block_device *, sector_t,
-					void **, unsigned long *pfn, long size);
+	long (*direct_access)(struct block_device *, sector_t, void __pmem **,
+			unsigned long *pfn, long size);
 	unsigned int (*check_events) (struct gendisk *disk,
 				      unsigned int clearing);
 	/* ->media_changed() is DEPRECATED, use ->check_events() instead */
@@ -1574,8 +1574,8 @@ extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
 extern int bdev_read_page(struct block_device *, sector_t, struct page *);
 extern int bdev_write_page(struct block_device *, sector_t, struct page *,
 						struct writeback_control *);
-extern long bdev_direct_access(struct block_device *, sector_t, void **addr,
-						unsigned long *pfn, long size);
+extern long bdev_direct_access(struct block_device *, sector_t,
+		void __pmem **addr, unsigned long *pfn, long size);
 #else /* CONFIG_BLOCK */
 
 struct block_device;

From 9a16ea900fadc88714e3a32214dea8e968ccd889 Mon Sep 17 00:00:00 2001
From: Markus Pargmann <mpa@pengutronix.de>
Date: Thu, 20 Aug 2015 11:12:35 +0200
Subject: [PATCH 349/734] regmap: regmap_raw_read return error on !bus->read

Return -ENOTSUPP if map->bus->read is not implemented and we do not use
the cache. This code path would directly use bus->read would run into an
NULL pointer for the read function.

Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 7111d04f26218b..fc14a7cc8c859a 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -2184,6 +2184,11 @@ int regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
 
 	if (regmap_volatile_range(map, reg, val_count) || map->cache_bypass ||
 	    map->cache_type == REGCACHE_NONE) {
+		if (!map->bus->read) {
+			ret = -ENOTSUPP;
+			goto out;
+		}
+
 		/* Physical block read if there's no cache involved */
 		ret = _regmap_raw_read(map, reg, val, val_len);
 

From b06ece93cf96b430587e77e01053b2b8f99cb750 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski.k@gmail.com>
Date: Tue, 7 Jul 2015 22:00:47 +0900
Subject: [PATCH 350/734] video: fbdev: s3c-fb: Constify platform_device_id

The platform_device_id is not modified by the driver and core uses it as
const.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski.k@gmail.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 drivers/video/fbdev/s3c-fb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/video/fbdev/s3c-fb.c b/drivers/video/fbdev/s3c-fb.c
index 7e3a05fc47aa34..f72dd12456f962 100644
--- a/drivers/video/fbdev/s3c-fb.c
+++ b/drivers/video/fbdev/s3c-fb.c
@@ -1938,7 +1938,7 @@ static struct s3c_fb_driverdata s3c_fb_data_s3c2443 = {
 	},
 };
 
-static struct platform_device_id s3c_fb_driver_ids[] = {
+static const struct platform_device_id s3c_fb_driver_ids[] = {
 	{
 		.name		= "s3c-fb",
 		.driver_data	= (unsigned long)&s3c_fb_data_64xx,

From c594b7f21d7d02115e828db46fddbba1da7ed1b8 Mon Sep 17 00:00:00 2001
From: Markus Pargmann <mpa@pengutronix.de>
Date: Fri, 21 Aug 2015 10:26:41 +0200
Subject: [PATCH 351/734] regmap: Fix regmap_bulk_write for bus writes

The regmap config does not prohibit val_bytes that are not powers of
two. But the current code of regmap_bulk_write for use_single_rw does
limit the possible val_bytes to 1, 2 and 4.

This patch fixes the behaviour to allow bus writes with non-standard
val_bytes sizes.

Cc: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 9c1f856842a3ae..90bf5ea34c47b6 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -1680,9 +1680,15 @@ int regmap_bulk_write(struct regmap *map, unsigned int reg, const void *val,
 
 	/*
 	 * Some devices don't support bulk write, for
-	 * them we have a series of single write operations.
+	 * them we have a series of single write operations in the first two if
+	 * blocks.
+	 *
+	 * The first if block is used for memory mapped io. It does not allow
+	 * val_bytes of 3 for example.
+	 * The second one is used for busses which do not have this limitation
+	 * and can write arbitrary value lengths.
 	 */
-	if (!map->bus || map->use_single_rw) {
+	if (!map->bus) {
 		map->lock(map->lock_arg);
 		for (i = 0; i < val_count; i++) {
 			unsigned int ival;
@@ -1714,6 +1720,17 @@ int regmap_bulk_write(struct regmap *map, unsigned int reg, const void *val,
 		}
 out:
 		map->unlock(map->lock_arg);
+	} else if (map->use_single_rw) {
+		map->lock(map->lock_arg);
+		for (i = 0; i < val_count; i++) {
+			ret = _regmap_raw_write(map,
+						reg + (i * map->reg_stride),
+						val + (i * val_bytes),
+						val_bytes);
+			if (ret)
+				break;
+		}
+		map->unlock(map->lock_arg);
 	} else {
 		void *wval;
 

From 67921a1a6660d32cc2770d05d656a1187b6d94d5 Mon Sep 17 00:00:00 2001
From: Markus Pargmann <mpa@pengutronix.de>
Date: Fri, 21 Aug 2015 10:26:42 +0200
Subject: [PATCH 352/734] regmap: Split use_single_rw internally into
 use_single_read/write

use_single_rw currently reflects the capabilities of the connected
device. The capabilities of the bus are currently missing for this
variable.

As there are read only and write only buses we need seperate values for
use_single_rw to also reflect tha capabilities of the bus.

This patch splits use_single_rw into use_single_read and
use_single_write. The initialization is changed to check the
configuration for use_single_rw and to check the capabilities of the
used bus.

Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/internal.h   | 6 ++++--
 drivers/base/regmap/regcache.c   | 2 +-
 drivers/base/regmap/regmap-irq.c | 4 ++--
 drivers/base/regmap/regmap.c     | 9 +++++----
 4 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
index b2b2849fc6d3b3..d744ae3926dd1c 100644
--- a/drivers/base/regmap/internal.h
+++ b/drivers/base/regmap/internal.h
@@ -139,8 +139,10 @@ struct regmap {
 	struct reg_default *patch;
 	int patch_regs;
 
-	/* if set, converts bulk rw to single rw */
-	bool use_single_rw;
+	/* if set, converts bulk read to single read */
+	bool use_single_read;
+	/* if set, converts bulk read to single read */
+	bool use_single_write;
 	/* if set, the device supports multi write mode */
 	bool can_multi_write;
 
diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c
index b9862d741a5621..6f8a13ec32a410 100644
--- a/drivers/base/regmap/regcache.c
+++ b/drivers/base/regmap/regcache.c
@@ -729,7 +729,7 @@ int regcache_sync_block(struct regmap *map, void *block,
 			unsigned int block_base, unsigned int start,
 			unsigned int end)
 {
-	if (regmap_can_raw_write(map) && !map->use_single_rw)
+	if (regmap_can_raw_write(map) && !map->use_single_write)
 		return regcache_sync_block_raw(map, block, cache_present,
 					       block_base, start, end);
 	else
diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c
index 2597600a5d26d8..38d1f72d869cf4 100644
--- a/drivers/base/regmap/regmap-irq.c
+++ b/drivers/base/regmap/regmap-irq.c
@@ -209,7 +209,7 @@ static irqreturn_t regmap_irq_thread(int irq, void *d)
 	 * Read in the statuses, using a single bulk read if possible
 	 * in order to reduce the I/O overheads.
 	 */
-	if (!map->use_single_rw && map->reg_stride == 1 &&
+	if (!map->use_single_read && map->reg_stride == 1 &&
 	    data->irq_reg_stride == 1) {
 		u8 *buf8 = data->status_reg_buf;
 		u16 *buf16 = data->status_reg_buf;
@@ -398,7 +398,7 @@ int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags,
 	else
 		d->irq_reg_stride = 1;
 
-	if (!map->use_single_rw && map->reg_stride == 1 &&
+	if (!map->use_single_read && map->reg_stride == 1 &&
 	    d->irq_reg_stride == 1) {
 		d->status_reg_buf = kmalloc(map->format.val_bytes *
 					    chip->num_regs, GFP_KERNEL);
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 90bf5ea34c47b6..bc82fd34483b91 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -576,7 +576,8 @@ struct regmap *regmap_init(struct device *dev,
 		map->reg_stride = config->reg_stride;
 	else
 		map->reg_stride = 1;
-	map->use_single_rw = config->use_single_rw;
+	map->use_single_read = config->use_single_rw || !bus || !bus->read;
+	map->use_single_write = config->use_single_rw || !bus || !bus->write;
 	map->can_multi_write = config->can_multi_write;
 	map->dev = dev;
 	map->bus = bus;
@@ -766,7 +767,7 @@ struct regmap *regmap_init(struct device *dev,
 		if ((reg_endian != REGMAP_ENDIAN_BIG) ||
 		    (val_endian != REGMAP_ENDIAN_BIG))
 			goto err_map;
-		map->use_single_rw = true;
+		map->use_single_write = true;
 	}
 
 	if (!map->format.format_write &&
@@ -1720,7 +1721,7 @@ int regmap_bulk_write(struct regmap *map, unsigned int reg, const void *val,
 		}
 out:
 		map->unlock(map->lock_arg);
-	} else if (map->use_single_rw) {
+	} else if (map->use_single_write) {
 		map->lock(map->lock_arg);
 		for (i = 0; i < val_count; i++) {
 			ret = _regmap_raw_write(map,
@@ -2312,7 +2313,7 @@ int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val,
 		 * Some devices does not support bulk read, for
 		 * them we have a series of single read operations.
 		 */
-		if (map->use_single_rw) {
+		if (map->use_single_read) {
 			for (i = 0; i < val_count; i++) {
 				ret = regmap_raw_read(map,
 						reg + (i * map->reg_stride),

From 9c9f7f675970ba1b888272f016157de21f69e7e2 Mon Sep 17 00:00:00 2001
From: Markus Pargmann <mpa@pengutronix.de>
Date: Fri, 21 Aug 2015 10:26:43 +0200
Subject: [PATCH 353/734] regmap: No multi_write support if bus->write does not
 exist

There is no multi_write support available if we cannot use raw_write.
This is the case if bus->write is not implemented.

This patch adds a condition that we need bus and bus->write so that
can_multi_write is true.

Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index bc82fd34483b91..27456c7978b901 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -578,7 +578,7 @@ struct regmap *regmap_init(struct device *dev,
 		map->reg_stride = 1;
 	map->use_single_read = config->use_single_rw || !bus || !bus->read;
 	map->use_single_write = config->use_single_rw || !bus || !bus->write;
-	map->can_multi_write = config->can_multi_write;
+	map->can_multi_write = config->can_multi_write && bus && bus->write;
 	map->dev = dev;
 	map->bus = bus;
 	map->bus_context = bus_context;

From c5f58f2d700ef484fc2fbaa9c624c6076109f989 Mon Sep 17 00:00:00 2001
From: Markus Pargmann <mpa@pengutronix.de>
Date: Fri, 21 Aug 2015 10:26:40 +0200
Subject: [PATCH 354/734] regmap: Add missing comments about struct regmap_bus

There are some fields of this struct undocumented or old. This patch
updates the missing comments.

Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regmap.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 59c55ea0f0b50c..73fc34d0c4c2ac 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -296,8 +296,12 @@ typedef void (*regmap_hw_free_context)(void *context);
  *                if not implemented  on a given device.
  * @async_write: Write operation which completes asynchronously, optional and
  *               must serialise with respect to non-async I/O.
+ * @reg_write: Write a single register value to the given register address. This
+ *             write operation has to complete when returning from the function.
  * @read: Read operation.  Data is returned in the buffer used to transmit
  *         data.
+ * @reg_read: Read a single register value from a given register address.
+ * @free_context: Free context.
  * @async_alloc: Allocate a regmap_async() structure.
  * @read_flag_mask: Mask to be set in the top byte of the register when doing
  *                  a read.
@@ -307,7 +311,6 @@ typedef void (*regmap_hw_free_context)(void *context);
  * @val_format_endian_default: Default endianness for formatted register
  *     values. Used when the regmap_config specifies DEFAULT. If this is
  *     DEFAULT, BIG is assumed.
- * @async_size: Size of struct used for async work.
  */
 struct regmap_bus {
 	bool fast_io;

From 5bbc08fb0f1457ceef388739b48c72675246639c Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 16 Jul 2015 16:56:20 +0530
Subject: [PATCH 355/734] m68k/coldfire/pit: Migrate to new 'set-state'
 interface

Migrate m68k driver to the new 'set-state' interface provided by
clockevents core, the earlier 'set-mode' interface is marked obsolete
now.

This also enables us to implement callbacks for new states of clockevent
devices, for example: ONESHOT_STOPPED.

We weren't doing anything in ->set_mode(RESUME) and so tick_resume()
isn't implemented.

Cc: Greg Ungerer <gerg@uclinux.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: linux-m68k@lists.linux-m68k.org
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Greg Ungerer <gerg@uclinux.org>
---
 arch/m68k/coldfire/pit.c | 66 ++++++++++++++++++----------------------
 1 file changed, 30 insertions(+), 36 deletions(-)

diff --git a/arch/m68k/coldfire/pit.c b/arch/m68k/coldfire/pit.c
index 493b3111d4c12b..d86a9ffb3f13ea 100644
--- a/arch/m68k/coldfire/pit.c
+++ b/arch/m68k/coldfire/pit.c
@@ -42,37 +42,28 @@ static u32 pit_cnt;
  * This is also called after resume to bring the PIT into operation again.
  */
 
-static void init_cf_pit_timer(enum clock_event_mode mode,
-                             struct clock_event_device *evt)
+static int cf_pit_set_periodic(struct clock_event_device *evt)
 {
-	switch (mode) {
-	case CLOCK_EVT_MODE_PERIODIC:
-
-		__raw_writew(MCFPIT_PCSR_DISABLE, TA(MCFPIT_PCSR));
-		__raw_writew(PIT_CYCLES_PER_JIFFY, TA(MCFPIT_PMR));
-		__raw_writew(MCFPIT_PCSR_EN | MCFPIT_PCSR_PIE | \
-				MCFPIT_PCSR_OVW | MCFPIT_PCSR_RLD | \
-				MCFPIT_PCSR_CLK64, TA(MCFPIT_PCSR));
-		break;
-
-	case CLOCK_EVT_MODE_SHUTDOWN:
-	case CLOCK_EVT_MODE_UNUSED:
-
-		__raw_writew(MCFPIT_PCSR_DISABLE, TA(MCFPIT_PCSR));
-		break;
-
-	case CLOCK_EVT_MODE_ONESHOT:
-
-		__raw_writew(MCFPIT_PCSR_DISABLE, TA(MCFPIT_PCSR));
-		__raw_writew(MCFPIT_PCSR_EN | MCFPIT_PCSR_PIE | \
-				MCFPIT_PCSR_OVW | MCFPIT_PCSR_CLK64, \
-				TA(MCFPIT_PCSR));
-		break;
-
-	case CLOCK_EVT_MODE_RESUME:
-		/* Nothing to do here */
-		break;
-	}
+	__raw_writew(MCFPIT_PCSR_DISABLE, TA(MCFPIT_PCSR));
+	__raw_writew(PIT_CYCLES_PER_JIFFY, TA(MCFPIT_PMR));
+	__raw_writew(MCFPIT_PCSR_EN | MCFPIT_PCSR_PIE |
+		     MCFPIT_PCSR_OVW | MCFPIT_PCSR_RLD |
+		     MCFPIT_PCSR_CLK64, TA(MCFPIT_PCSR));
+	return 0;
+}
+
+static int cf_pit_set_oneshot(struct clock_event_device *evt)
+{
+	__raw_writew(MCFPIT_PCSR_DISABLE, TA(MCFPIT_PCSR));
+	__raw_writew(MCFPIT_PCSR_EN | MCFPIT_PCSR_PIE |
+		     MCFPIT_PCSR_OVW | MCFPIT_PCSR_CLK64, TA(MCFPIT_PCSR));
+	return 0;
+}
+
+static int cf_pit_shutdown(struct clock_event_device *evt)
+{
+	__raw_writew(MCFPIT_PCSR_DISABLE, TA(MCFPIT_PCSR));
+	return 0;
 }
 
 /*
@@ -88,12 +79,15 @@ static int cf_pit_next_event(unsigned long delta,
 }
 
 struct clock_event_device cf_pit_clockevent = {
-	.name		= "pit",
-	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
-	.set_mode	= init_cf_pit_timer,
-	.set_next_event	= cf_pit_next_event,
-	.shift		= 32,
-	.irq		= MCF_IRQ_PIT1,
+	.name			= "pit",
+	.features		= CLOCK_EVT_FEAT_PERIODIC |
+				  CLOCK_EVT_FEAT_ONESHOT,
+	.set_state_shutdown	= cf_pit_shutdown,
+	.set_state_periodic	= cf_pit_set_periodic,
+	.set_state_oneshot	= cf_pit_set_oneshot,
+	.set_next_event		= cf_pit_next_event,
+	.shift			= 32,
+	.irq			= MCF_IRQ_PIT1,
 };
 
 
From 50e48bd0673180311874d0c1bd88505eefd75c4e Mon Sep 17 00:00:00 2001
From: Alexander Kuleshov <kuleshovmail@gmail.com>
Date: Wed, 12 Aug 2015 13:14:59 +0600
Subject: [PATCH 356/734] m68k/coldfire: use PFN_DOWN macro

Replace ((x) >> PAGE_SHIFT) with the predefined PFN_DOWN macro.

Signed-off-by: Alexander Kuleshov <kuleshovmail@gmail.com>
Signed-off-by: Greg Ungerer <gerg@uclinux.org>
---
 arch/m68k/coldfire/m54xx.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/m68k/coldfire/m54xx.c b/arch/m68k/coldfire/m54xx.c
index 075aaabd136026..f7836c6a6b60eb 100644
--- a/arch/m68k/coldfire/m54xx.c
+++ b/arch/m68k/coldfire/m54xx.c
@@ -25,6 +25,7 @@
 #include <asm/m54xxgpt.h>
 #ifdef CONFIG_MMU
 #include <asm/mmu_context.h>
+#include <linux/pfn.h>
 #endif
 
 /***************************************************************************/
@@ -91,13 +92,13 @@ static void __init mcf54xx_bootmem_alloc(void)
 	m68k_memory[0].size = _ramend - _rambase;
 
 	/* compute total pages in system */
-	num_pages = (_ramend - _rambase) >> PAGE_SHIFT;
+	num_pages = PFN_DOWN(_ramend - _rambase);
 
 	/* page numbers */
 	memstart = PAGE_ALIGN(_ramstart);
-	min_low_pfn = _rambase >> PAGE_SHIFT;
-	start_pfn = memstart >> PAGE_SHIFT;
-	max_low_pfn = _ramend >> PAGE_SHIFT;
+	min_low_pfn = PFN_DOWN(_rambase);
+	start_pfn = PFN_DOWN(memstart);
+	max_low_pfn = PFN_DOWN(_ramend);
 	high_memory = (void *)_ramend;
 
 	m68k_virt_to_node_shift = fls(_ramend - _rambase - 1) - 6;

From 5497159c460fadf0d8190b6ef144ec22fa26aa6b Mon Sep 17 00:00:00 2001
From: "ludovic.desroches@atmel.com" <ludovic.desroches@atmel.com>
Date: Wed, 29 Jul 2015 16:22:46 +0200
Subject: [PATCH 357/734] mmc: sdhci: switch from programmable clock mode to
 divided one if needed

In programmable mode, if the clock frequency is too high, the divider
can be too small to meet the clock frequency requirement especially to
init the SD card. In this case, switch to the divided clock mode.

Signed-off-by: Ludovic Desroches <ludovic.desroches@atmel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci.c | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index c83d11080da0ac..0f1a8876e3b10e 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1151,6 +1151,7 @@ void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
 	int real_div = div, clk_mul = 1;
 	u16 clk = 0;
 	unsigned long timeout;
+	bool switch_base_clk = false;
 
 	host->mmc->actual_clock = 0;
 
@@ -1188,15 +1189,25 @@ void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
 					<= clock)
 					break;
 			}
-			/*
-			 * Set Programmable Clock Mode in the Clock
-			 * Control register.
-			 */
-			clk = SDHCI_PROG_CLOCK_MODE;
-			real_div = div;
-			clk_mul = host->clk_mul;
-			div--;
-		} else {
+			if ((host->max_clk * host->clk_mul / div) <= clock) {
+				/*
+				 * Set Programmable Clock Mode in the Clock
+				 * Control register.
+				 */
+				clk = SDHCI_PROG_CLOCK_MODE;
+				real_div = div;
+				clk_mul = host->clk_mul;
+				div--;
+			} else {
+				/*
+				 * Divisor can be too small to reach clock
+				 * speed requirement. Then use the base clock.
+				 */
+				switch_base_clk = true;
+			}
+		}
+
+		if (!host->clk_mul || switch_base_clk) {
 			/* Version 3.00 divisors must be a multiple of 2. */
 			if (host->max_clk <= clock)
 				div = 1;

From bb5f8ea4d5149f3dec6f7cd24c040c52bfc0cdbd Mon Sep 17 00:00:00 2001
From: "ludovic.desroches@atmel.com" <ludovic.desroches@atmel.com>
Date: Wed, 29 Jul 2015 16:22:47 +0200
Subject: [PATCH 358/734] mmc: sdhci-of-at91: introduce driver for the Atmel
 SDMMC

Introduce driver for he Atmel SDMMC available on sama5d2. It is a sdhci
compliant controller.

Signed-off-by: Ludovic Desroches <ludovic.desroches@atmel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 .../devicetree/bindings/mmc/sdhci-atmel.txt   |  21 ++
 drivers/mmc/host/Kconfig                      |   8 +
 drivers/mmc/host/Makefile                     |   1 +
 drivers/mmc/host/sdhci-of-at91.c              | 192 ++++++++++++++++++
 4 files changed, 222 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/mmc/sdhci-atmel.txt
 create mode 100644 drivers/mmc/host/sdhci-of-at91.c

diff --git a/Documentation/devicetree/bindings/mmc/sdhci-atmel.txt b/Documentation/devicetree/bindings/mmc/sdhci-atmel.txt
new file mode 100644
index 00000000000000..1b662d7171a0f0
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/sdhci-atmel.txt
@@ -0,0 +1,21 @@
+* Atmel SDHCI controller
+
+This file documents the differences between the core properties in
+Documentation/devicetree/bindings/mmc/mmc.txt and the properties used by the
+sdhci-of-at91 driver.
+
+Required properties:
+- compatible:		Must be "atmel,sama5d2-sdhci".
+- clocks:		Phandlers to the clocks.
+- clock-names:		Must be "hclock", "multclk", "baseclk";
+
+
+Example:
+
+sdmmc0: sdio-host@a0000000 {
+	compatible = "atmel,sama5d2-sdhci";
+	reg = <0xa0000000 0x300>;
+	interrupts = <31 IRQ_TYPE_LEVEL_HIGH 0>;
+	clocks = <&sdmmc0_hclk>, <&sdmmc0_gclk>, <&main>;
+	clock-names = "hclock", "multclk", "baseclk";
+};
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index 6a0f9c79be2652..8a1e3498261e93 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -129,6 +129,14 @@ config MMC_SDHCI_OF_ARASAN
 
 	  If unsure, say N.
 
+config MMC_SDHCI_OF_AT91
+	tristate "SDHCI OF support for the Atmel SDMMC controller"
+	depends on MMC_SDHCI_PLTFM
+	depends on OF
+	select MMC_SDHCI_IO_ACCESSORS
+	help
+	  This selects the Atmel SDMMC driver
+
 config MMC_SDHCI_OF_ESDHC
 	tristate "SDHCI OF support for the Freescale eSDHC controller"
 	depends on MMC_SDHCI_PLTFM
diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index e928d61c5f4be3..4f3452afa6ca3d 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -67,6 +67,7 @@ obj-$(CONFIG_MMC_SDHCI_ESDHC_IMX)	+= sdhci-esdhc-imx.o
 obj-$(CONFIG_MMC_SDHCI_DOVE)		+= sdhci-dove.o
 obj-$(CONFIG_MMC_SDHCI_TEGRA)		+= sdhci-tegra.o
 obj-$(CONFIG_MMC_SDHCI_OF_ARASAN)	+= sdhci-of-arasan.o
+obj-$(CONFIG_MMC_SDHCI_OF_AT91)		+= sdhci-of-at91.o
 obj-$(CONFIG_MMC_SDHCI_OF_ESDHC)	+= sdhci-of-esdhc.o
 obj-$(CONFIG_MMC_SDHCI_OF_HLWD)		+= sdhci-of-hlwd.o
 obj-$(CONFIG_MMC_SDHCI_BCM_KONA)	+= sdhci-bcm-kona.o
diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c
new file mode 100644
index 00000000000000..7a9f4b19f98959
--- /dev/null
+++ b/drivers/mmc/host/sdhci-of-at91.c
@@ -0,0 +1,192 @@
+/*
+ * Atmel SDMMC controller driver.
+ *
+ * Copyright (C) 2015 Atmel,
+ *		 2015 Ludovic Desroches <ludovic.desroches@atmel.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/mmc/host.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+
+#include "sdhci-pltfm.h"
+
+#define SDMMC_CACR	0x230
+#define		SDMMC_CACR_CAPWREN	BIT(0)
+#define		SDMMC_CACR_KEY		(0x46 << 8)
+
+struct sdhci_at91_priv {
+	struct clk *hclock;
+	struct clk *gck;
+	struct clk *mainck;
+};
+
+static const struct sdhci_ops sdhci_at91_sama5d2_ops = {
+	.set_clock		= sdhci_set_clock,
+	.set_bus_width		= sdhci_set_bus_width,
+	.reset			= sdhci_reset,
+	.set_uhs_signaling	= sdhci_set_uhs_signaling,
+};
+
+static const struct sdhci_pltfm_data soc_data_sama5d2 = {
+	.ops = &sdhci_at91_sama5d2_ops,
+};
+
+static const struct of_device_id sdhci_at91_dt_match[] = {
+	{ .compatible = "atmel,sama5d2-sdhci", .data = &soc_data_sama5d2 },
+	{}
+};
+
+static int sdhci_at91_probe(struct platform_device *pdev)
+{
+	const struct of_device_id	*match;
+	const struct sdhci_pltfm_data	*soc_data;
+	struct sdhci_host		*host;
+	struct sdhci_pltfm_host		*pltfm_host;
+	struct sdhci_at91_priv		*priv;
+	unsigned int			caps0, caps1;
+	unsigned int			clk_base, clk_mul;
+	unsigned int			gck_rate, real_gck_rate;
+	int				ret;
+
+	match = of_match_device(sdhci_at91_dt_match, &pdev->dev);
+	if (!match)
+		return -EINVAL;
+	soc_data = match->data;
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		dev_err(&pdev->dev, "unable to allocate private data\n");
+		return -ENOMEM;
+	}
+
+	priv->mainck = devm_clk_get(&pdev->dev, "baseclk");
+	if (IS_ERR(priv->mainck)) {
+		dev_err(&pdev->dev, "failed to get baseclk\n");
+		return PTR_ERR(priv->mainck);
+	}
+
+	priv->hclock = devm_clk_get(&pdev->dev, "hclock");
+	if (IS_ERR(priv->hclock)) {
+		dev_err(&pdev->dev, "failed to get hclock\n");
+		return PTR_ERR(priv->hclock);
+	}
+
+	priv->gck = devm_clk_get(&pdev->dev, "multclk");
+	if (IS_ERR(priv->gck)) {
+		dev_err(&pdev->dev, "failed to get multclk\n");
+		return PTR_ERR(priv->gck);
+	}
+
+	host = sdhci_pltfm_init(pdev, soc_data, 0);
+	if (IS_ERR(host))
+		return PTR_ERR(host);
+
+	/*
+	 * The mult clock is provided by as a generated clock by the PMC
+	 * controller. In order to set the rate of gck, we have to get the
+	 * base clock rate and the clock mult from capabilities.
+	 */
+	clk_prepare_enable(priv->hclock);
+	caps0 = readl(host->ioaddr + SDHCI_CAPABILITIES);
+	caps1 = readl(host->ioaddr + SDHCI_CAPABILITIES_1);
+	clk_base = (caps0 & SDHCI_CLOCK_V3_BASE_MASK) >> SDHCI_CLOCK_BASE_SHIFT;
+	clk_mul = (caps1 & SDHCI_CLOCK_MUL_MASK) >> SDHCI_CLOCK_MUL_SHIFT;
+	gck_rate = clk_base * 1000000 * (clk_mul + 1);
+	ret = clk_set_rate(priv->gck, gck_rate);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "failed to set gck");
+		goto hclock_disable_unprepare;
+		return -EINVAL;
+	}
+	/*
+	 * We need to check if we have the requested rate for gck because in
+	 * some cases this rate could be not supported. If it happens, the rate
+	 * is the closest one gck can provide. We have to update the value
+	 * of clk mul.
+	 */
+	real_gck_rate = clk_get_rate(priv->gck);
+	if (real_gck_rate != gck_rate) {
+		clk_mul = real_gck_rate / (clk_base * 1000000) - 1;
+		caps1 &= (~SDHCI_CLOCK_MUL_MASK);
+		caps1 |= ((clk_mul << SDHCI_CLOCK_MUL_SHIFT) & SDHCI_CLOCK_MUL_MASK);
+		/* Set capabilities in r/w mode. */
+		writel(SDMMC_CACR_KEY | SDMMC_CACR_CAPWREN, host->ioaddr + SDMMC_CACR);
+		writel(caps1, host->ioaddr + SDHCI_CAPABILITIES_1);
+		/* Set capabilities in ro mode. */
+		writel(0, host->ioaddr + SDMMC_CACR);
+		dev_info(&pdev->dev, "update clk mul to %u as gck rate is %u Hz\n",
+			 clk_mul, real_gck_rate);
+	}
+
+	clk_prepare_enable(priv->mainck);
+	clk_prepare_enable(priv->gck);
+
+	pltfm_host = sdhci_priv(host);
+	pltfm_host->priv = priv;
+
+	ret = mmc_of_parse(host->mmc);
+	if (ret)
+		goto clocks_disable_unprepare;
+
+	sdhci_get_of_property(pdev);
+
+	ret = sdhci_add_host(host);
+	if (ret)
+		goto clocks_disable_unprepare;
+
+	return 0;
+
+clocks_disable_unprepare:
+	clk_disable_unprepare(priv->gck);
+	clk_disable_unprepare(priv->mainck);
+hclock_disable_unprepare:
+	clk_disable_unprepare(priv->hclock);
+	sdhci_pltfm_free(pdev);
+	return ret;
+}
+
+static int sdhci_at91_remove(struct platform_device *pdev)
+{
+	struct sdhci_host	*host = platform_get_drvdata(pdev);
+	struct sdhci_pltfm_host	*pltfm_host = sdhci_priv(host);
+	struct sdhci_at91_priv	*priv = pltfm_host->priv;
+
+	sdhci_pltfm_unregister(pdev);
+
+	clk_disable_unprepare(priv->gck);
+	clk_disable_unprepare(priv->hclock);
+	clk_disable_unprepare(priv->mainck);
+
+	return 0;
+}
+
+static struct platform_driver sdhci_at91_driver = {
+	.driver		= {
+		.name	= "sdhci-at91",
+		.owner	= THIS_MODULE,
+		.of_match_table = sdhci_at91_dt_match,
+		.pm	= SDHCI_PLTFM_PMOPS,
+	},
+	.probe		= sdhci_at91_probe,
+	.remove		= sdhci_at91_remove,
+};
+
+module_platform_driver(sdhci_at91_driver);
+
+MODULE_DESCRIPTION("SDHCI driver for at91");
+MODULE_AUTHOR("Ludovic Desroches <ludovic.desroches@atmel.com>");
+MODULE_LICENSE("GPL v2");

From 05c441efcc9796dd27451b9d87f5b22b8ebcdb2b Mon Sep 17 00:00:00 2001
From: "ludovic.desroches@atmel.com" <ludovic.desroches@atmel.com>
Date: Wed, 29 Jul 2015 16:22:48 +0200
Subject: [PATCH 359/734] MAINTAINERS: add entry for Atmel sdhci-of-at91 driver

Add an entry for Atmel SDMMC device.

Signed-off-by: Ludovic Desroches <ludovic.desroches@atmel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 MAINTAINERS | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 569568f6644f20..a9638eb1e4d3ae 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1882,6 +1882,12 @@ L:	linux-mtd@lists.infradead.org
 S:	Supported
 F:	drivers/mtd/nand/atmel_nand*
 
+ATMEL SDMMC DRIVER
+M:	Ludovic Desroches <ludovic.desroches@atmel.com>
+L:	linux-mmc@vger.kernel.org
+S:	Supported
+F:	drivers/mmc/host/sdhci-of-at91.c
+
 ATMEL SPI DRIVER
 M:	Nicolas Ferre <nicolas.ferre@atmel.com>
 S:	Supported

From 0df9d2eae5e1092b07eaab6b989c2ff14115cab5 Mon Sep 17 00:00:00 2001
From: Takeshi Kihara <takeshi.kihara.df@renesas.com>
Date: Mon, 20 Jul 2015 01:39:59 +0900
Subject: [PATCH 360/734] mmc: tmio: Fix timeout value for command request

Fix the problem which timeout occurs at the time of command request with
several cards.

The timeout value was insufficient as a verification of several cards,
so it was changed 5 seconds from 2 seconds.

Signed-off-by: Takeshi Kihara <takeshi.kihara.df@renesas.com>
Signed-off-by: Yoshihiro Kaneko <ykaneko0929@gmail.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/tmio_mmc_pio.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c
index e3dcf31a8bd6a0..a10fde40b6c3dd 100644
--- a/drivers/mmc/host/tmio_mmc_pio.c
+++ b/drivers/mmc/host/tmio_mmc_pio.c
@@ -83,6 +83,8 @@ static int tmio_mmc_next_sg(struct tmio_mmc_host *host)
 	return --host->sg_len;
 }
 
+#define CMDREQ_TIMEOUT	5000
+
 #ifdef CONFIG_MMC_DEBUG
 
 #define STATUS_TO_TEXT(a, status, i) \
@@ -230,7 +232,7 @@ static void tmio_mmc_reset_work(struct work_struct *work)
 	 */
 	if (IS_ERR_OR_NULL(mrq)
 	    || time_is_after_jiffies(host->last_req_ts +
-		msecs_to_jiffies(2000))) {
+		msecs_to_jiffies(CMDREQ_TIMEOUT))) {
 		spin_unlock_irqrestore(&host->lock, flags);
 		return;
 	}
@@ -818,7 +820,7 @@ static void tmio_mmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
 	ret = tmio_mmc_start_command(host, mrq->cmd);
 	if (!ret) {
 		schedule_delayed_work(&host->delayed_reset_work,
-				      msecs_to_jiffies(2000));
+				      msecs_to_jiffies(CMDREQ_TIMEOUT));
 		return;
 	}
 

From f5005f7835e0da9ea6f203f8acc356bef9b331da Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Tue, 19 May 2015 17:40:55 +0200
Subject: [PATCH 361/734] dt-bindings: add header for generic I2C flags in
 bindings

Tested-by: Andrey Danin <danindrey@mail.ru>
Acked-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 include/dt-bindings/i2c/i2c.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
 create mode 100644 include/dt-bindings/i2c/i2c.h

diff --git a/include/dt-bindings/i2c/i2c.h b/include/dt-bindings/i2c/i2c.h
new file mode 100644
index 00000000000000..1d5da81d90f144
--- /dev/null
+++ b/include/dt-bindings/i2c/i2c.h
@@ -0,0 +1,18 @@
+/*
+ * This header provides constants for I2C bindings
+ *
+ * Copyright (C) 2015 by Sang Engineering
+ * Copyright (C) 2015 by Renesas Electronics Corporation
+ *
+ * Wolfram Sang <wsa@sang-engineering.com>
+ *
+ * GPLv2 only
+ */
+
+#ifndef _DT_BINDINGS_I2C_I2C_H
+#define _DT_BINDINGS_I2C_I2C_H
+
+#define I2C_TEN_BIT_ADDRESS	(1 << 31)
+#define I2C_OWN_SLAVE_ADDRESS	(1 << 30)
+
+#endif

From c5ebb387f4e6b2dd7c74d71caf7b696834d0c887 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Tue, 19 May 2015 17:44:31 +0200
Subject: [PATCH 362/734] i2c: add a flag to mark clients as slaves

And update indentation with one more tab, sigh...

Tested-by: Andrey Danin <danindrey@mail.ru>
Acked-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 include/linux/i2c.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index e2c859b74f8b71..5aea071372185e 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -550,11 +550,12 @@ void i2c_lock_adapter(struct i2c_adapter *);
 void i2c_unlock_adapter(struct i2c_adapter *);
 
 /*flags for the client struct: */
-#define I2C_CLIENT_PEC	0x04		/* Use Packet Error Checking */
-#define I2C_CLIENT_TEN	0x10		/* we have a ten bit chip address */
+#define I2C_CLIENT_PEC		0x04	/* Use Packet Error Checking */
+#define I2C_CLIENT_TEN		0x10	/* we have a ten bit chip address */
 					/* Must equal I2C_M_TEN below */
-#define I2C_CLIENT_WAKE	0x80		/* for board_info; true iff can wake */
-#define I2C_CLIENT_SCCB	0x9000		/* Use Omnivision SCCB protocol */
+#define I2C_CLIENT_SLAVE	0x20	/* we are the slave */
+#define I2C_CLIENT_WAKE		0x80	/* for board_info; true iff can wake */
+#define I2C_CLIENT_SCCB		0x9000	/* Use Omnivision SCCB protocol */
 					/* Must match I2C_M_STOP|IGNORE_NAK */
 
 /* i2c adapter classes (bitmask) */

From da899f55b359225f85e154765baaddb13ec436ca Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Mon, 18 May 2015 21:09:12 +0200
Subject: [PATCH 363/734] i2c: apply address offset for slaves, too

We want a separate address range for being an I2C slave. Add an offset
of 0x1000, so it can be combined with ten bit addresses as well. Add a
separate function to create the address value, we will need it later in
other places.

Tested-by: Andrey Danin <danindrey@mail.ru>
Acked-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core.c | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 07a83f34ed58e7..47dbe251474142 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -57,6 +57,9 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/i2c.h>
 
+#define I2C_ADDR_OFFSET_TEN_BIT	0xa000
+#define I2C_ADDR_OFFSET_SLAVE	0x1000
+
 /* core_lock protects i2c_adapter_idr, and guarantees
    that device detection, deletion of detected devices, and attach_adapter
    calls are serialized */
@@ -778,6 +781,21 @@ struct i2c_client *i2c_verify_client(struct device *dev)
 EXPORT_SYMBOL(i2c_verify_client);
 
 
+/* Return a unique address which takes the flags of the client into account */
+static unsigned short i2c_encode_flags_to_addr(struct i2c_client *client)
+{
+	unsigned short addr = client->addr;
+
+	/* For some client flags, add an arbitrary offset to avoid collisions */
+	if (client->flags & I2C_CLIENT_TEN)
+		addr |= I2C_ADDR_OFFSET_TEN_BIT;
+
+	if (client->flags & I2C_CLIENT_SLAVE)
+		addr |= I2C_ADDR_OFFSET_SLAVE;
+
+	return addr;
+}
+
 /* This is a permissive address validity check, I2C address map constraints
  * are purposely not enforced, except for the general call address. */
 static int i2c_check_client_addr_validity(const struct i2c_client *client)
@@ -923,10 +941,8 @@ static void i2c_dev_set_name(struct i2c_adapter *adap,
 		return;
 	}
 
-	/* For 10-bit clients, add an arbitrary offset to avoid collisions */
 	dev_set_name(&client->dev, "%d-%04x", i2c_adapter_id(adap),
-		     client->addr | ((client->flags & I2C_CLIENT_TEN)
-				     ? 0xa000 : 0));
+		     i2c_encode_flags_to_addr(client));
 }
 
 /**

From 66be6056eba80690bb7fa3d983c21494e1950bea Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Fri, 17 Jul 2015 12:43:22 +0200
Subject: [PATCH 364/734] i2c: rename address check functions

The current naming is based on the arguments of the functions and not on
what they do. Even I as the maintainer find this confusing, so let's
rename them to something more descriptive.

Tested-by: Andrey Danin <danindrey@mail.ru>
Acked-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 47dbe251474142..fced494040a8bd 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -798,7 +798,7 @@ static unsigned short i2c_encode_flags_to_addr(struct i2c_client *client)
 
 /* This is a permissive address validity check, I2C address map constraints
  * are purposely not enforced, except for the general call address. */
-static int i2c_check_client_addr_validity(const struct i2c_client *client)
+static int i2c_check_addr_validity(const struct i2c_client *client)
 {
 	if (client->flags & I2C_CLIENT_TEN) {
 		/* 10-bit address, all values are valid */
@@ -816,7 +816,7 @@ static int i2c_check_client_addr_validity(const struct i2c_client *client)
  * device uses a reserved address, then it shouldn't be probed. 7-bit
  * addressing is assumed, 10-bit address devices are rare and should be
  * explicitly enumerated. */
-static int i2c_check_addr_validity(unsigned short addr)
+static int i2c_check_7bit_addr_validity_strict(unsigned short addr)
 {
 	/*
 	 * Reserved addresses per I2C specification:
@@ -985,7 +985,7 @@ i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info)
 	strlcpy(client->name, info->type, sizeof(client->name));
 
 	/* Check for address validity */
-	status = i2c_check_client_addr_validity(client);
+	status = i2c_check_addr_validity(client);
 	if (status) {
 		dev_err(&adap->dev, "Invalid %d-bit I2C address 0x%02hx\n",
 			client->flags & I2C_CLIENT_TEN ? 10 : 7, client->addr);
@@ -2296,7 +2296,7 @@ static int i2c_detect_address(struct i2c_client *temp_client,
 	int err;
 
 	/* Make sure the address is valid */
-	err = i2c_check_addr_validity(addr);
+	err = i2c_check_7bit_addr_validity_strict(addr);
 	if (err) {
 		dev_warn(&adapter->dev, "Invalid probe address 0x%02x\n",
 			 addr);
@@ -2413,7 +2413,7 @@ i2c_new_probed_device(struct i2c_adapter *adap,
 
 	for (i = 0; addr_list[i] != I2C_CLIENT_END; i++) {
 		/* Check address validity */
-		if (i2c_check_addr_validity(addr_list[i]) < 0) {
+		if (i2c_check_7bit_addr_validity_strict(addr_list[i]) < 0) {
 			dev_warn(&adap->dev, "Invalid 7-bit address "
 				 "0x%02x\n", addr_list[i]);
 			continue;
@@ -2997,7 +2997,7 @@ int i2c_slave_register(struct i2c_client *client, i2c_slave_cb_t slave_cb)
 
 	if (!(client->flags & I2C_CLIENT_TEN)) {
 		/* Enforce stricter address checking */
-		ret = i2c_check_addr_validity(client->addr);
+		ret = i2c_check_7bit_addr_validity_strict(client->addr);
 		if (ret) {
 			dev_err(&client->dev, "%s: invalid address\n", __func__);
 			return ret;

From c4019b7040eaf88f440ce5212e055a4f19b1b541 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Fri, 17 Jul 2015 12:50:06 +0200
Subject: [PATCH 365/734] i2c: make address check indpendent from client struct

We want to use this function with struct boardinfo soon, so let's just
pass the parameters really needed. We also extend the type of addr, so
more types can be input. Remove a superfluous dangling comment while
here.

Tested-by: Andrey Danin <danindrey@mail.ru>
Acked-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index fced494040a8bd..4ffe06451081e8 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -798,15 +798,15 @@ static unsigned short i2c_encode_flags_to_addr(struct i2c_client *client)
 
 /* This is a permissive address validity check, I2C address map constraints
  * are purposely not enforced, except for the general call address. */
-static int i2c_check_addr_validity(const struct i2c_client *client)
+static int i2c_check_addr_validity(unsigned addr, unsigned short flags)
 {
-	if (client->flags & I2C_CLIENT_TEN) {
+	if (flags & I2C_CLIENT_TEN) {
 		/* 10-bit address, all values are valid */
-		if (client->addr > 0x3ff)
+		if (addr > 0x3ff)
 			return -EINVAL;
 	} else {
 		/* 7-bit address, reject the general call address */
-		if (client->addr == 0x00 || client->addr > 0x7f)
+		if (addr == 0x00 || addr > 0x7f)
 			return -EINVAL;
 	}
 	return 0;
@@ -984,8 +984,7 @@ i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info)
 
 	strlcpy(client->name, info->type, sizeof(client->name));
 
-	/* Check for address validity */
-	status = i2c_check_addr_validity(client);
+	status = i2c_check_addr_validity(client->addr, client->flags);
 	if (status) {
 		dev_err(&adap->dev, "Invalid %d-bit I2C address 0x%02hx\n",
 			client->flags & I2C_CLIENT_TEN ? 10 : 7, client->addr);

From b4e2f6ac1281cd3e066919cc762eef4924e9fcc5 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Tue, 19 May 2015 21:04:40 +0200
Subject: [PATCH 366/734] i2c: apply DT flags when probing

Check for slave and 10-bit flags when probing and mark the client when
found. Improve the address validity check, too

Tested-by: Andrey Danin <danindrey@mail.ru>
Acked-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 4ffe06451081e8..7b18f31bf6c639 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -27,6 +27,7 @@
    I2C slave support (c) 2014 by Wolfram Sang <wsa@sang-engineering.com>
  */
 
+#include <dt-bindings/i2c/i2c.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/delay.h>
@@ -1288,7 +1289,8 @@ static struct i2c_client *of_i2c_register_device(struct i2c_adapter *adap,
 	struct i2c_client *result;
 	struct i2c_board_info info = {};
 	struct dev_archdata dev_ad = {};
-	const __be32 *addr;
+	const __be32 *addr_be;
+	u32 addr;
 	int len;
 
 	dev_dbg(&adap->dev, "of_i2c: register %s\n", node->full_name);
@@ -1299,20 +1301,31 @@ static struct i2c_client *of_i2c_register_device(struct i2c_adapter *adap,
 		return ERR_PTR(-EINVAL);
 	}
 
-	addr = of_get_property(node, "reg", &len);
-	if (!addr || (len < sizeof(*addr))) {
+	addr_be = of_get_property(node, "reg", &len);
+	if (!addr_be || (len < sizeof(*addr_be))) {
 		dev_err(&adap->dev, "of_i2c: invalid reg on %s\n",
 			node->full_name);
 		return ERR_PTR(-EINVAL);
 	}
 
-	info.addr = be32_to_cpup(addr);
-	if (info.addr > (1 << 10) - 1) {
+	addr = be32_to_cpup(addr_be);
+	if (addr & I2C_TEN_BIT_ADDRESS) {
+		addr &= ~I2C_TEN_BIT_ADDRESS;
+		info.flags |= I2C_CLIENT_TEN;
+	}
+
+	if (addr & I2C_OWN_SLAVE_ADDRESS) {
+		addr &= ~I2C_OWN_SLAVE_ADDRESS;
+		info.flags |= I2C_CLIENT_SLAVE;
+	}
+
+	if (i2c_check_addr_validity(addr, info.flags)) {
 		dev_err(&adap->dev, "of_i2c: invalid addr=%x on %s\n",
 			info.addr, node->full_name);
 		return ERR_PTR(-EINVAL);
 	}
 
+	info.addr = addr;
 	info.of_node = of_node_get(node);
 	info.archdata = &dev_ad;
 

From 9bccc70a127cfe2a13e34d6b6e7300caae113f8f Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Fri, 17 Jul 2015 14:48:56 +0200
Subject: [PATCH 367/734] i2c: take address space into account when checking
 for used addresses

It is not enough to compare the plain address value, we also need to
check the flags enabling a different address space. E.g. it is valid to
have address 0x50 as a 7-bit address and 0x050 as 10-bit address on the
same bus. Same for addresses when we are the slave.

Tested-by: Andrey Danin <danindrey@mail.ru>
Acked-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 7b18f31bf6c639..fc6d89316144c1 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -839,7 +839,7 @@ static int __i2c_check_addr_busy(struct device *dev, void *addrp)
 	struct i2c_client	*client = i2c_verify_client(dev);
 	int			addr = *(int *)addrp;
 
-	if (client && client->addr == addr)
+	if (client && i2c_encode_flags_to_addr(client) == addr)
 		return -EBUSY;
 	return 0;
 }
@@ -993,7 +993,7 @@ i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info)
 	}
 
 	/* Check for address business */
-	status = i2c_check_addr_busy(adap, client->addr);
+	status = i2c_check_addr_busy(adap, i2c_encode_flags_to_addr(client));
 	if (status)
 		goto out_err;
 
@@ -2315,7 +2315,7 @@ static int i2c_detect_address(struct i2c_client *temp_client,
 		return err;
 	}
 
-	/* Skip if already in use */
+	/* Skip if already in use (7 bit, no need to encode flags) */
 	if (i2c_check_addr_busy(adapter, addr))
 		return 0;
 
@@ -2431,7 +2431,7 @@ i2c_new_probed_device(struct i2c_adapter *adap,
 			continue;
 		}
 
-		/* Check address availability */
+		/* Check address availability (7 bit, no need to encode flags) */
 		if (i2c_check_addr_busy(adap, addr_list[i])) {
 			dev_dbg(&adap->dev, "Address 0x%02x already in "
 				"use, not probing\n", addr_list[i]);

From cfa0327b0d03091e0c47249c080e50e287be762d Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Mon, 27 Jul 2015 14:03:38 +0200
Subject: [PATCH 368/734] i2c: support 10 bit and slave addresses in sysfs
 'new_device'

We now have seperate address spaces for 10 bit and we-are-slave clients.
Update the sysfs device instantiation method to support these types by
accepting the address offsets that are assigned to the extra address
spaces. Update the documentation, too.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 Documentation/i2c/slave-interface   |  9 ++++++---
 Documentation/i2c/ten-bit-addresses |  4 ++++
 drivers/i2c/i2c-core.c              | 12 +++++++++++-
 3 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/Documentation/i2c/slave-interface b/Documentation/i2c/slave-interface
index 2dee4e2d62df19..61ed05cd95317f 100644
--- a/Documentation/i2c/slave-interface
+++ b/Documentation/i2c/slave-interface
@@ -31,10 +31,13 @@ User manual
 ===========
 
 I2C slave backends behave like standard I2C clients. So, you can instantiate
-them as described in the document 'instantiating-devices'. A quick example for
-instantiating the slave-eeprom driver from userspace at address 0x64 on bus 1:
+them as described in the document 'instantiating-devices'. The only difference
+is that i2c slave backends have their own address space. So, you have to add
+0x1000 to the address you would originally request. An example for
+instantiating the slave-eeprom driver from userspace at the 7 bit address 0x64
+on bus 1:
 
-  # echo slave-24c02 0x64 > /sys/bus/i2c/devices/i2c-1/new_device
+  # echo slave-24c02 0x1064 > /sys/bus/i2c/devices/i2c-1/new_device
 
 Each backend should come with separate documentation to describe its specific
 behaviour and setup.
diff --git a/Documentation/i2c/ten-bit-addresses b/Documentation/i2c/ten-bit-addresses
index cdfe13901b99cb..7b2d11e53a49f1 100644
--- a/Documentation/i2c/ten-bit-addresses
+++ b/Documentation/i2c/ten-bit-addresses
@@ -2,6 +2,10 @@ The I2C protocol knows about two kinds of device addresses: normal 7 bit
 addresses, and an extended set of 10 bit addresses. The sets of addresses
 do not intersect: the 7 bit address 0x10 is not the same as the 10 bit
 address 0x10 (though a single device could respond to both of them).
+To avoid ambiguity, the user sees 10 bit addresses mapped to a different
+address space, namely 0xa000-0xa3ff. The leading 0xa (= 10) represents the
+10 bit mode. This is used for creating device names in sysfs. It is also
+needed when instantiating 10 bit devices via the new_device file in sysfs.
 
 I2C messages to and from 10-bit address devices have a different format.
 See the I2C specification for the details.
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index fc6d89316144c1..039817eaecb58f 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -1158,6 +1158,16 @@ i2c_sysfs_new_device(struct device *dev, struct device_attribute *attr,
 		return -EINVAL;
 	}
 
+	if ((info.addr & I2C_ADDR_OFFSET_TEN_BIT) == I2C_ADDR_OFFSET_TEN_BIT) {
+		info.addr &= ~I2C_ADDR_OFFSET_TEN_BIT;
+		info.flags |= I2C_CLIENT_TEN;
+	}
+
+	if (info.addr & I2C_ADDR_OFFSET_SLAVE) {
+		info.addr &= ~I2C_ADDR_OFFSET_SLAVE;
+		info.flags |= I2C_CLIENT_SLAVE;
+	}
+
 	client = i2c_new_device(adap, &info);
 	if (!client)
 		return -EINVAL;
@@ -1209,7 +1219,7 @@ i2c_sysfs_delete_device(struct device *dev, struct device_attribute *attr,
 			  i2c_adapter_depth(adap));
 	list_for_each_entry_safe(client, next, &adap->userspace_clients,
 				 detected) {
-		if (client->addr == addr) {
+		if (i2c_encode_flags_to_addr(client) == addr) {
 			dev_info(dev, "%s: Deleting device %s at 0x%02hx\n",
 				 "delete_device", client->name, client->addr);
 

From c6909d6f6f1082b8bb4c1b0ef3460a005c9dcb4d Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Wed, 5 Aug 2015 21:12:54 +0200
Subject: [PATCH 369/734] i2c: slave: print warning if slave flag not set

Address collisions will be rare, but we should let the user know that
slaves have their own address space nonetheless.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 039817eaecb58f..a6780289c61d20 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -3017,6 +3017,10 @@ int i2c_slave_register(struct i2c_client *client, i2c_slave_cb_t slave_cb)
 		return -EINVAL;
 	}
 
+	if (!(client->flags & I2C_CLIENT_SLAVE))
+		dev_warn(&client->dev, "%s: client slave flag not set. You might see address collisions\n",
+			 __func__);
+
 	if (!(client->flags & I2C_CLIENT_TEN)) {
 		/* Enforce stricter address checking */
 		ret = i2c_check_7bit_addr_validity_strict(client->addr);

From 7a59b00a0906945f7fe25a10332ac0820491a0c3 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Sat, 8 Aug 2015 13:35:18 +0200
Subject: [PATCH 370/734] i2c: dt: describe generic bindings

Start a new file which describes the generic bindings used for I2C with
device tree. So we have a central place to look for them, increase
visibility of them, and hopefully reduce the amount of custom properties
introduced.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Vaibhav Hiremath <vaibhav.hiremath@linaro.org>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 Documentation/devicetree/bindings/i2c/i2c.txt | 33 +++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/i2c/i2c.txt

diff --git a/Documentation/devicetree/bindings/i2c/i2c.txt b/Documentation/devicetree/bindings/i2c/i2c.txt
new file mode 100644
index 00000000000000..1175efed4a41b5
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c.txt
@@ -0,0 +1,33 @@
+Generic device tree bindings for I2C busses
+===========================================
+
+This document describes generic bindings which can be used to describe I2C
+busses in a device tree.
+
+Required properties
+-------------------
+
+- #address-cells  - should be <1>. Read more about addresses below.
+- #size-cells     - should be <0>.
+- compatible      - name of I2C bus controller following generic names
+		    recommended practice.
+
+For other required properties e.g. to describe register sets, interrupts,
+clocks, etc. check the binding documentation of the specific driver.
+
+The cells properties above define that an address of children of an I2C bus
+are described by a single value. This is usually a 7 bit address. However,
+flags can be attached to the address. I2C_TEN_BIT_ADDRESS is used to mark a 10
+bit address. It is needed to avoid the ambiguity between e.g. a 7 bit address
+of 0x50 and a 10 bit address of 0x050 which, in theory, can be on the same bus.
+Another flag is I2C_OWN_SLAVE_ADDRESS to mark addresses on which we listen to
+be devices ourselves.
+
+Optional properties
+-------------------
+
+These properties may not be supported by all drivers. However, if a driver
+wants to support one of the below features, it should adapt the bindings below.
+
+- clock-frequency	- frequency of bus clock in Hz
+- wakeup-source		- device can be used as a wakeup source.

From b3fdd32799d834e2626fae087906e886037350c6 Mon Sep 17 00:00:00 2001
From: York Sun <yorksun@freescale.com>
Date: Mon, 17 Aug 2015 11:53:48 -0700
Subject: [PATCH 371/734] i2c: mux: Add register-based mux i2c-mux-reg

Based on i2c-mux-gpio driver, similarly the register-based mux
switch from one bus to another by setting a single register.
The register can be on PCIe bus, local bus, or any memory-mapped
address. The endianness of such register can be specified in device
tree if used, or in platform data.

Signed-off-by: York Sun <yorksun@freescale.com>
Acked-by: Alexander Sverdlin <alexander.sverdlin@nokia.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 .../devicetree/bindings/i2c/i2c-mux-reg.txt   |  74 +++++
 drivers/i2c/muxes/Kconfig                     |  11 +
 drivers/i2c/muxes/Makefile                    |   1 +
 drivers/i2c/muxes/i2c-mux-reg.c               | 294 ++++++++++++++++++
 include/linux/platform_data/i2c-mux-reg.h     |  44 +++
 5 files changed, 424 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/i2c/i2c-mux-reg.txt
 create mode 100644 drivers/i2c/muxes/i2c-mux-reg.c
 create mode 100644 include/linux/platform_data/i2c-mux-reg.h

diff --git a/Documentation/devicetree/bindings/i2c/i2c-mux-reg.txt b/Documentation/devicetree/bindings/i2c/i2c-mux-reg.txt
new file mode 100644
index 00000000000000..688783fbe696be
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-mux-reg.txt
@@ -0,0 +1,74 @@
+Register-based I2C Bus Mux
+
+This binding describes an I2C bus multiplexer that uses a single register
+to route the I2C signals.
+
+Required properties:
+- compatible: i2c-mux-reg
+- i2c-parent: The phandle of the I2C bus that this multiplexer's master-side
+  port is connected to.
+* Standard I2C mux properties. See mux.txt in this directory.
+* I2C child bus nodes. See mux.txt in this directory.
+
+Optional properties:
+- reg: this pair of <offset size> specifies the register to control the mux.
+  The <offset size> depends on its parent node. It can be any memory-mapped
+  address. The size must be either 1, 2, or 4 bytes. If reg is omitted, the
+  resource of this device will be used.
+- little-endian: The existence indicates the register is in little endian.
+- big-endian: The existence indicates the register is in big endian.
+  If both little-endian and big-endian are omitted, the endianness of the
+  CPU will be used.
+- write-only: The existence indicates the register is write-only.
+- idle-state: value to set the muxer to when idle. When no value is
+  given, it defaults to the last value used.
+
+Whenever an access is made to a device on a child bus, the value set
+in the revelant node's reg property will be output to the register.
+
+If an idle state is defined, using the idle-state (optional) property,
+whenever an access is not being made to a device on a child bus, the
+register will be set according to the idle value.
+
+If an idle state is not defined, the most recently used value will be
+left programmed into the register.
+
+Example of a mux on PCIe card, the host is a powerpc SoC (big endian):
+
+	i2c-mux {
+		/* the <offset size> depends on the address translation
+		 * of the parent device. If omitted, device resource
+		 * will be used instead. The size is to determine
+		 * whether iowrite32, iowrite16, or iowrite8 will be used.
+		 */
+		reg = <0x6028 0x4>;
+		little-endian;		/* little endian register on PCIe */
+		compatible = "i2c-mux-reg";
+		#address-cells = <1>;
+		#size-cells = <0>;
+		i2c-parent = <&i2c1>;
+		i2c@0 {
+			reg = <0>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			si5338: clock-generator@70 {
+				compatible = "silabs,si5338";
+				reg = <0x70>;
+				/* other stuff */
+			};
+		};
+
+		i2c@1 {
+			/* data is written using iowrite32 */
+			reg = <1>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			si5338: clock-generator@70 {
+				compatible = "silabs,si5338";
+				reg = <0x70>;
+				/* other stuff */
+			};
+		};
+	};
diff --git a/drivers/i2c/muxes/Kconfig b/drivers/i2c/muxes/Kconfig
index fdd0769c84a31a..f06b0e24673b87 100644
--- a/drivers/i2c/muxes/Kconfig
+++ b/drivers/i2c/muxes/Kconfig
@@ -61,4 +61,15 @@ config I2C_MUX_PINCTRL
 	  This driver can also be built as a module. If so, the module will be
 	  called pinctrl-i2cmux.
 
+config I2C_MUX_REG
+	tristate "Register-based I2C multiplexer"
+	help
+	  If you say yes to this option, support will be included for a
+	  register based I2C multiplexer. This driver provides access to
+	  I2C busses connected through a MUX, which is controlled
+	  by a single register.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called i2c-mux-reg.
+
 endmenu
diff --git a/drivers/i2c/muxes/Makefile b/drivers/i2c/muxes/Makefile
index 465778b5d5dc86..e89799b76a9280 100644
--- a/drivers/i2c/muxes/Makefile
+++ b/drivers/i2c/muxes/Makefile
@@ -7,5 +7,6 @@ obj-$(CONFIG_I2C_MUX_GPIO)	+= i2c-mux-gpio.o
 obj-$(CONFIG_I2C_MUX_PCA9541)	+= i2c-mux-pca9541.o
 obj-$(CONFIG_I2C_MUX_PCA954x)	+= i2c-mux-pca954x.o
 obj-$(CONFIG_I2C_MUX_PINCTRL)	+= i2c-mux-pinctrl.o
+obj-$(CONFIG_I2C_MUX_REG)	+= i2c-mux-reg.o
 
 ccflags-$(CONFIG_I2C_DEBUG_BUS) := -DDEBUG
diff --git a/drivers/i2c/muxes/i2c-mux-reg.c b/drivers/i2c/muxes/i2c-mux-reg.c
new file mode 100644
index 00000000000000..86d41d36a78340
--- /dev/null
+++ b/drivers/i2c/muxes/i2c-mux-reg.c
@@ -0,0 +1,294 @@
+/*
+ * I2C multiplexer using a single register
+ *
+ * Copyright 2015 Freescale Semiconductor
+ * York Sun  <yorksun@freescale.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/i2c.h>
+#include <linux/i2c-mux.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/platform_data/i2c-mux-reg.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+struct regmux {
+	struct i2c_adapter *parent;
+	struct i2c_adapter **adap; /* child busses */
+	struct i2c_mux_reg_platform_data data;
+};
+
+static int i2c_mux_reg_set(const struct regmux *mux, unsigned int chan_id)
+{
+	if (!mux->data.reg)
+		return -EINVAL;
+
+	switch (mux->data.reg_size) {
+	case 4:
+		if (mux->data.little_endian) {
+			iowrite32(chan_id, mux->data.reg);
+			if (!mux->data.write_only)
+				ioread32(mux->data.reg);
+		} else {
+			iowrite32be(chan_id, mux->data.reg);
+			if (!mux->data.write_only)
+				ioread32(mux->data.reg);
+		}
+		break;
+	case 2:
+		if (mux->data.little_endian) {
+			iowrite16(chan_id, mux->data.reg);
+			if (!mux->data.write_only)
+				ioread16(mux->data.reg);
+		} else {
+			iowrite16be(chan_id, mux->data.reg);
+			if (!mux->data.write_only)
+				ioread16be(mux->data.reg);
+		}
+		break;
+	case 1:
+		iowrite8(chan_id, mux->data.reg);
+		if (!mux->data.write_only)
+			ioread8(mux->data.reg);
+		break;
+	default:
+		pr_err("Invalid register size\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int i2c_mux_reg_select(struct i2c_adapter *adap, void *data,
+			      unsigned int chan)
+{
+	struct regmux *mux = data;
+
+	return i2c_mux_reg_set(mux, chan);
+}
+
+static int i2c_mux_reg_deselect(struct i2c_adapter *adap, void *data,
+				unsigned int chan)
+{
+	struct regmux *mux = data;
+
+	if (mux->data.idle_in_use)
+		return i2c_mux_reg_set(mux, mux->data.idle);
+
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static int i2c_mux_reg_probe_dt(struct regmux *mux,
+					struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct device_node *adapter_np, *child;
+	struct i2c_adapter *adapter;
+	struct resource res;
+	unsigned *values;
+	int i = 0;
+
+	if (!np)
+		return -ENODEV;
+
+	adapter_np = of_parse_phandle(np, "i2c-parent", 0);
+	if (!adapter_np) {
+		dev_err(&pdev->dev, "Cannot parse i2c-parent\n");
+		return -ENODEV;
+	}
+	adapter = of_find_i2c_adapter_by_node(adapter_np);
+	if (!adapter)
+		return -EPROBE_DEFER;
+
+	mux->parent = adapter;
+	mux->data.parent = i2c_adapter_id(adapter);
+	put_device(&adapter->dev);
+
+	mux->data.n_values = of_get_child_count(np);
+	if (of_find_property(np, "little-endian", NULL)) {
+		mux->data.little_endian = true;
+	} else if (of_find_property(np, "big-endian", NULL)) {
+		mux->data.little_endian = false;
+	} else {
+#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : \
+	defined(__LITTLE_ENDIAN)
+		mux->data.little_endian = true;
+#elif defined(__BYTE_ORDER) ? __BYTE_ORDER == __BIG_ENDIAN : \
+	defined(__BIG_ENDIAN)
+		mux->data.little_endian = false;
+#else
+#error Endianness not defined?
+#endif
+	}
+	if (of_find_property(np, "write-only", NULL))
+		mux->data.write_only = true;
+	else
+		mux->data.write_only = false;
+
+	values = devm_kzalloc(&pdev->dev,
+			      sizeof(*mux->data.values) * mux->data.n_values,
+			      GFP_KERNEL);
+	if (!values) {
+		dev_err(&pdev->dev, "Cannot allocate values array");
+		return -ENOMEM;
+	}
+
+	for_each_child_of_node(np, child) {
+		of_property_read_u32(child, "reg", values + i);
+		i++;
+	}
+	mux->data.values = values;
+
+	if (!of_property_read_u32(np, "idle-state", &mux->data.idle))
+		mux->data.idle_in_use = true;
+
+	/* map address from "reg" if exists */
+	if (of_address_to_resource(np, 0, &res)) {
+		mux->data.reg_size = resource_size(&res);
+		if (mux->data.reg_size > 4) {
+			dev_err(&pdev->dev, "Invalid address size\n");
+			return -EINVAL;
+		}
+		mux->data.reg = devm_ioremap_resource(&pdev->dev, &res);
+		if (IS_ERR(mux->data.reg))
+			return PTR_ERR(mux->data.reg);
+	}
+
+	return 0;
+}
+#else
+static int i2c_mux_reg_probe_dt(struct gpiomux *mux,
+					struct platform_device *pdev)
+{
+	return 0;
+}
+#endif
+
+static int i2c_mux_reg_probe(struct platform_device *pdev)
+{
+	struct regmux *mux;
+	struct i2c_adapter *parent;
+	struct resource *res;
+	int (*deselect)(struct i2c_adapter *, void *, u32);
+	unsigned int class;
+	int i, ret, nr;
+
+	mux = devm_kzalloc(&pdev->dev, sizeof(*mux), GFP_KERNEL);
+	if (!mux)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, mux);
+
+	if (dev_get_platdata(&pdev->dev)) {
+		memcpy(&mux->data, dev_get_platdata(&pdev->dev),
+			sizeof(mux->data));
+
+		parent = i2c_get_adapter(mux->data.parent);
+		if (!parent)
+			return -EPROBE_DEFER;
+
+		mux->parent = parent;
+	} else {
+		ret = i2c_mux_reg_probe_dt(mux, pdev);
+		if (ret < 0) {
+			dev_err(&pdev->dev, "Error parsing device tree");
+			return ret;
+		}
+	}
+
+	if (!mux->data.reg) {
+		dev_info(&pdev->dev,
+			"Register not set, using platform resource\n");
+		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		mux->data.reg_size = resource_size(res);
+		if (mux->data.reg_size > 4) {
+			dev_err(&pdev->dev, "Invalid resource size\n");
+			return -EINVAL;
+		}
+		mux->data.reg = devm_ioremap_resource(&pdev->dev, res);
+		if (IS_ERR(mux->data.reg))
+			return PTR_ERR(mux->data.reg);
+	}
+
+	mux->adap = devm_kzalloc(&pdev->dev,
+				 sizeof(*mux->adap) * mux->data.n_values,
+				 GFP_KERNEL);
+	if (!mux->adap) {
+		dev_err(&pdev->dev, "Cannot allocate i2c_adapter structure");
+		return -ENOMEM;
+	}
+
+	if (mux->data.idle_in_use)
+		deselect = i2c_mux_reg_deselect;
+	else
+		deselect = NULL;
+
+	for (i = 0; i < mux->data.n_values; i++) {
+		nr = mux->data.base_nr ? (mux->data.base_nr + i) : 0;
+		class = mux->data.classes ? mux->data.classes[i] : 0;
+
+		mux->adap[i] = i2c_add_mux_adapter(mux->parent, &pdev->dev, mux,
+						   nr, mux->data.values[i],
+						   class, i2c_mux_reg_select,
+						   deselect);
+		if (!mux->adap[i]) {
+			ret = -ENODEV;
+			dev_err(&pdev->dev, "Failed to add adapter %d\n", i);
+			goto add_adapter_failed;
+		}
+	}
+
+	dev_dbg(&pdev->dev, "%d port mux on %s adapter\n",
+		 mux->data.n_values, mux->parent->name);
+
+	return 0;
+
+add_adapter_failed:
+	for (; i > 0; i--)
+		i2c_del_mux_adapter(mux->adap[i - 1]);
+
+	return ret;
+}
+
+static int i2c_mux_reg_remove(struct platform_device *pdev)
+{
+	struct regmux *mux = platform_get_drvdata(pdev);
+	int i;
+
+	for (i = 0; i < mux->data.n_values; i++)
+		i2c_del_mux_adapter(mux->adap[i]);
+
+	i2c_put_adapter(mux->parent);
+
+	return 0;
+}
+
+static const struct of_device_id i2c_mux_reg_of_match[] = {
+	{ .compatible = "i2c-mux-reg", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, i2c_mux_reg_of_match);
+
+static struct platform_driver i2c_mux_reg_driver = {
+	.probe	= i2c_mux_reg_probe,
+	.remove	= i2c_mux_reg_remove,
+	.driver	= {
+		.name	= "i2c-mux-reg",
+	},
+};
+
+module_platform_driver(i2c_mux_reg_driver);
+
+MODULE_DESCRIPTION("Register-based I2C multiplexer driver");
+MODULE_AUTHOR("York Sun <yorksun@freescale.com>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:i2c-mux-reg");
diff --git a/include/linux/platform_data/i2c-mux-reg.h b/include/linux/platform_data/i2c-mux-reg.h
new file mode 100644
index 00000000000000..c68712aadf43cf
--- /dev/null
+++ b/include/linux/platform_data/i2c-mux-reg.h
@@ -0,0 +1,44 @@
+/*
+ * I2C multiplexer using a single register
+ *
+ * Copyright 2015 Freescale Semiconductor
+ * York Sun <yorksun@freescale.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#ifndef __LINUX_PLATFORM_DATA_I2C_MUX_REG_H
+#define __LINUX_PLATFORM_DATA_I2C_MUX_REG_H
+
+/**
+ * struct i2c_mux_reg_platform_data - Platform-dependent data for i2c-mux-reg
+ * @parent: Parent I2C bus adapter number
+ * @base_nr: Base I2C bus number to number adapters from or zero for dynamic
+ * @values: Array of value for each channel
+ * @n_values: Number of multiplexer channels
+ * @little_endian: Indicating if the register is in little endian
+ * @write_only: Reading the register is not allowed by hardware
+ * @classes: Optional I2C auto-detection classes
+ * @idle: Value to write to mux when idle
+ * @idle_in_use: indicate if idle value is in use
+ * @reg: Virtual address of the register to switch channel
+ * @reg_size: register size in bytes
+ */
+struct i2c_mux_reg_platform_data {
+	int parent;
+	int base_nr;
+	const unsigned int *values;
+	int n_values;
+	bool little_endian;
+	bool write_only;
+	const unsigned int *classes;
+	u32 idle;
+	bool idle_in_use;
+	void __iomem *reg;
+	resource_size_t reg_size;
+};
+
+#endif	/* __LINUX_PLATFORM_DATA_I2C_MUX_REG_H */

From 3f9c37a0c9a59db97ca5712eca7838b842949047 Mon Sep 17 00:00:00 2001
From: Joachim Eastwood <manabian@gmail.com>
Date: Sun, 16 Aug 2015 20:10:16 +0200
Subject: [PATCH 372/734] i2c: lpc2k: add driver

Add support for the I2C controller found on several NXP devices
including LPC2xxx, LPC178x/7x and LPC18xx/43xx. The controller
is implemented as a state machine and the driver act upon the
state changes when the bus is accessed.

The I2C controller supports master/slave operation, bus
arbitration, programmable clock rate, and speeds up to 1 Mbit/s.

Signed-off-by: Joachim Eastwood <manabian@gmail.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 .../devicetree/bindings/i2c/i2c-lpc2k.txt     |  33 ++
 drivers/i2c/busses/Kconfig                    |  10 +
 drivers/i2c/busses/Makefile                   |   1 +
 drivers/i2c/busses/i2c-lpc2k.c                | 513 ++++++++++++++++++
 4 files changed, 557 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/i2c/i2c-lpc2k.txt
 create mode 100644 drivers/i2c/busses/i2c-lpc2k.c

diff --git a/Documentation/devicetree/bindings/i2c/i2c-lpc2k.txt b/Documentation/devicetree/bindings/i2c/i2c-lpc2k.txt
new file mode 100644
index 00000000000000..4101aa621ad42e
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-lpc2k.txt
@@ -0,0 +1,33 @@
+NXP I2C controller for LPC2xxx/178x/18xx/43xx
+
+Required properties:
+ - compatible: must be "nxp,lpc1788-i2c"
+ - reg: physical address and length of the device registers
+ - interrupts: a single interrupt specifier
+ - clocks: clock for the device
+ - #address-cells: should be <1>
+ - #size-cells: should be <0>
+
+Optional properties:
+- clock-frequency: the desired I2C bus clock frequency in Hz; in
+  absence of this property the default value is used (100 kHz).
+
+Example:
+i2c0: i2c@400a1000 {
+	compatible = "nxp,lpc1788-i2c";
+	reg = <0x400a1000 0x1000>;
+	interrupts = <18>;
+	clocks = <&ccu1 CLK_APB1_I2C0>;
+	#address-cells = <1>;
+	#size-cells = <0>;
+};
+
+&i2c0 {
+	clock-frequency = <400000>;
+
+	lm75@48 {
+		compatible = "nxp,lm75";
+		reg = <0x48>;
+	};
+};
+
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 0b798ae708fe45..48f4b796003c20 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -619,6 +619,16 @@ config I2C_KEMPLD
 	  This driver can also be built as a module. If so, the module
 	  will be called i2c-kempld.
 
+config I2C_LPC2K
+	tristate "I2C bus support for NXP LPC2K/LPC178x/18xx/43xx"
+	depends on OF && (ARCH_LPC18XX || COMPILE_TEST)
+	help
+	  This driver supports the I2C interface found several NXP
+	  devices including LPC2xxx, LPC178x/7x and LPC18xx/43xx.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called i2c-lpc2k.
+
 config I2C_MESON
 	tristate "Amlogic Meson I2C controller"
 	depends on ARCH_MESON
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index 50e8bbb65f1cd9..6df3b303bd092b 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -59,6 +59,7 @@ obj-$(CONFIG_I2C_IMX)		+= i2c-imx.o
 obj-$(CONFIG_I2C_IOP3XX)	+= i2c-iop3xx.o
 obj-$(CONFIG_I2C_JZ4780)	+= i2c-jz4780.o
 obj-$(CONFIG_I2C_KEMPLD)	+= i2c-kempld.o
+obj-$(CONFIG_I2C_LPC2K)		+= i2c-lpc2k.o
 obj-$(CONFIG_I2C_MESON)		+= i2c-meson.o
 obj-$(CONFIG_I2C_MPC)		+= i2c-mpc.o
 obj-$(CONFIG_I2C_MT65XX)	+= i2c-mt65xx.o
diff --git a/drivers/i2c/busses/i2c-lpc2k.c b/drivers/i2c/busses/i2c-lpc2k.c
new file mode 100644
index 00000000000000..8560a13bf1b308
--- /dev/null
+++ b/drivers/i2c/busses/i2c-lpc2k.c
@@ -0,0 +1,513 @@
+/*
+ * Copyright (C) 2011 NXP Semiconductors
+ *
+ * Code portions referenced from the i2x-pxa and i2c-pnx drivers
+ *
+ * Make SMBus byte and word transactions work on LPC178x/7x
+ * Copyright (c) 2012
+ * Alexander Potashev, Emcraft Systems, aspotashev@emcraft.com
+ * Anton Protopopov, Emcraft Systems, antonp@emcraft.com
+ *
+ * Copyright (C) 2015 Joachim Eastwood <manabian@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <linux/clk.h>
+#include <linux/errno.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/sched.h>
+#include <linux/time.h>
+
+/* LPC24xx register offsets and bits */
+#define LPC24XX_I2CONSET	0x00
+#define LPC24XX_I2STAT		0x04
+#define LPC24XX_I2DAT		0x08
+#define LPC24XX_I2ADDR		0x0c
+#define LPC24XX_I2SCLH		0x10
+#define LPC24XX_I2SCLL		0x14
+#define LPC24XX_I2CONCLR	0x18
+
+#define LPC24XX_AA		BIT(2)
+#define LPC24XX_SI		BIT(3)
+#define LPC24XX_STO		BIT(4)
+#define LPC24XX_STA		BIT(5)
+#define LPC24XX_I2EN		BIT(6)
+
+#define LPC24XX_STO_AA		(LPC24XX_STO | LPC24XX_AA)
+#define LPC24XX_CLEAR_ALL	(LPC24XX_AA | LPC24XX_SI | LPC24XX_STO | \
+				 LPC24XX_STA | LPC24XX_I2EN)
+
+/* I2C SCL clock has different duty cycle depending on mode */
+#define I2C_STD_MODE_DUTY		46
+#define I2C_FAST_MODE_DUTY		36
+#define I2C_FAST_MODE_PLUS_DUTY		38
+
+/*
+ * 26 possible I2C status codes, but codes applicable only
+ * to master are listed here and used in this driver
+ */
+enum {
+	M_BUS_ERROR		= 0x00,
+	M_START			= 0x08,
+	M_REPSTART		= 0x10,
+	MX_ADDR_W_ACK		= 0x18,
+	MX_ADDR_W_NACK		= 0x20,
+	MX_DATA_W_ACK		= 0x28,
+	MX_DATA_W_NACK		= 0x30,
+	M_DATA_ARB_LOST		= 0x38,
+	MR_ADDR_R_ACK		= 0x40,
+	MR_ADDR_R_NACK		= 0x48,
+	MR_DATA_R_ACK		= 0x50,
+	MR_DATA_R_NACK		= 0x58,
+	M_I2C_IDLE		= 0xf8,
+};
+
+struct lpc2k_i2c {
+	void __iomem		*base;
+	struct clk		*clk;
+	int			irq;
+	wait_queue_head_t	wait;
+	struct i2c_adapter	adap;
+	struct i2c_msg		*msg;
+	int			msg_idx;
+	int			msg_status;
+	int			is_last;
+};
+
+static void i2c_lpc2k_reset(struct lpc2k_i2c *i2c)
+{
+	/* Will force clear all statuses */
+	writel(LPC24XX_CLEAR_ALL, i2c->base + LPC24XX_I2CONCLR);
+	writel(0, i2c->base + LPC24XX_I2ADDR);
+	writel(LPC24XX_I2EN, i2c->base + LPC24XX_I2CONSET);
+}
+
+static int i2c_lpc2k_clear_arb(struct lpc2k_i2c *i2c)
+{
+	unsigned long timeout = jiffies + msecs_to_jiffies(1000);
+
+	/*
+	 * If the transfer needs to abort for some reason, we'll try to
+	 * force a stop condition to clear any pending bus conditions
+	 */
+	writel(LPC24XX_STO, i2c->base + LPC24XX_I2CONSET);
+
+	/* Wait for status change */
+	while (readl(i2c->base + LPC24XX_I2STAT) != M_I2C_IDLE) {
+		if (time_after(jiffies, timeout)) {
+			/* Bus was not idle, try to reset adapter */
+			i2c_lpc2k_reset(i2c);
+			return -EBUSY;
+		}
+
+		cpu_relax();
+	}
+
+	return 0;
+}
+
+static void i2c_lpc2k_pump_msg(struct lpc2k_i2c *i2c)
+{
+	unsigned char data;
+	u32 status;
+
+	/*
+	 * I2C in the LPC2xxx series is basically a state machine.
+	 * Just run through the steps based on the current status.
+	 */
+	status = readl(i2c->base + LPC24XX_I2STAT);
+
+	switch (status) {
+	case M_START:
+	case M_REPSTART:
+		/* Start bit was just sent out, send out addr and dir */
+		data = i2c->msg->addr << 1;
+		if (i2c->msg->flags & I2C_M_RD)
+			data |= 1;
+
+		writel(data, i2c->base + LPC24XX_I2DAT);
+		writel(LPC24XX_STA, i2c->base + LPC24XX_I2CONCLR);
+		break;
+
+	case MX_ADDR_W_ACK:
+	case MX_DATA_W_ACK:
+		/*
+		 * Address or data was sent out with an ACK. If there is more
+		 * data to send, send it now
+		 */
+		if (i2c->msg_idx < i2c->msg->len) {
+			writel(i2c->msg->buf[i2c->msg_idx],
+			       i2c->base + LPC24XX_I2DAT);
+		} else if (i2c->is_last) {
+			/* Last message, send stop */
+			writel(LPC24XX_STO_AA, i2c->base + LPC24XX_I2CONSET);
+			writel(LPC24XX_SI, i2c->base + LPC24XX_I2CONCLR);
+			i2c->msg_status = 0;
+			disable_irq_nosync(i2c->irq);
+		} else {
+			i2c->msg_status = 0;
+			disable_irq_nosync(i2c->irq);
+		}
+
+		i2c->msg_idx++;
+		break;
+
+	case MR_ADDR_R_ACK:
+		/* Receive first byte from slave */
+		if (i2c->msg->len == 1) {
+			/* Last byte, return NACK */
+			writel(LPC24XX_AA, i2c->base + LPC24XX_I2CONCLR);
+		} else {
+			/* Not last byte, return ACK */
+			writel(LPC24XX_AA, i2c->base + LPC24XX_I2CONSET);
+		}
+
+		writel(LPC24XX_STA, i2c->base + LPC24XX_I2CONCLR);
+		break;
+
+	case MR_DATA_R_NACK:
+		/*
+		 * The I2C shows NACK status on reads, so we need to accept
+		 * the NACK as an ACK here. This should be ok, as the real
+		 * BACK would of been caught on the address write.
+		 */
+	case MR_DATA_R_ACK:
+		/* Data was received */
+		if (i2c->msg_idx < i2c->msg->len) {
+			i2c->msg->buf[i2c->msg_idx] =
+					readl(i2c->base + LPC24XX_I2DAT);
+		}
+
+		/* If transfer is done, send STOP */
+		if (i2c->msg_idx >= i2c->msg->len - 1 && i2c->is_last) {
+			writel(LPC24XX_STO_AA, i2c->base + LPC24XX_I2CONSET);
+			writel(LPC24XX_SI, i2c->base + LPC24XX_I2CONCLR);
+			i2c->msg_status = 0;
+		}
+
+		/* Message is done */
+		if (i2c->msg_idx >= i2c->msg->len - 1) {
+			i2c->msg_status = 0;
+			disable_irq_nosync(i2c->irq);
+		}
+
+		/*
+		 * One pre-last data input, send NACK to tell the slave that
+		 * this is going to be the last data byte to be transferred.
+		 */
+		if (i2c->msg_idx >= i2c->msg->len - 2) {
+			/* One byte left to receive - NACK */
+			writel(LPC24XX_AA, i2c->base + LPC24XX_I2CONCLR);
+		} else {
+			/* More than one byte left to receive - ACK */
+			writel(LPC24XX_AA, i2c->base + LPC24XX_I2CONSET);
+		}
+
+		writel(LPC24XX_STA, i2c->base + LPC24XX_I2CONCLR);
+		i2c->msg_idx++;
+		break;
+
+	case MX_ADDR_W_NACK:
+	case MX_DATA_W_NACK:
+	case MR_ADDR_R_NACK:
+		/* NACK processing is done */
+		writel(LPC24XX_STO_AA, i2c->base + LPC24XX_I2CONSET);
+		i2c->msg_status = -ENXIO;
+		disable_irq_nosync(i2c->irq);
+		break;
+
+	case M_DATA_ARB_LOST:
+		/* Arbitration lost */
+		i2c->msg_status = -EAGAIN;
+
+		/* Release the I2C bus */
+		writel(LPC24XX_STA | LPC24XX_STO, i2c->base + LPC24XX_I2CONCLR);
+		disable_irq_nosync(i2c->irq);
+		break;
+
+	default:
+		/* Unexpected statuses */
+		i2c->msg_status = -EIO;
+		disable_irq_nosync(i2c->irq);
+		break;
+	}
+
+	/* Exit on failure or all bytes transferred */
+	if (i2c->msg_status != -EBUSY)
+		wake_up(&i2c->wait);
+
+	/*
+	 * If `msg_status` is zero, then `lpc2k_process_msg()`
+	 * is responsible for clearing the SI flag.
+	 */
+	if (i2c->msg_status != 0)
+		writel(LPC24XX_SI, i2c->base + LPC24XX_I2CONCLR);
+}
+
+static int lpc2k_process_msg(struct lpc2k_i2c *i2c, int msgidx)
+{
+	/* A new transfer is kicked off by initiating a start condition */
+	if (!msgidx) {
+		writel(LPC24XX_STA, i2c->base + LPC24XX_I2CONSET);
+	} else {
+		/*
+		 * A multi-message I2C transfer continues where the
+		 * previous I2C transfer left off and uses the
+		 * current condition of the I2C adapter.
+		 */
+		if (unlikely(i2c->msg->flags & I2C_M_NOSTART)) {
+			WARN_ON(i2c->msg->len == 0);
+
+			if (!(i2c->msg->flags & I2C_M_RD)) {
+				/* Start transmit of data */
+				writel(i2c->msg->buf[0],
+				       i2c->base + LPC24XX_I2DAT);
+				i2c->msg_idx++;
+			}
+		} else {
+			/* Start or repeated start */
+			writel(LPC24XX_STA, i2c->base + LPC24XX_I2CONSET);
+		}
+
+		writel(LPC24XX_SI, i2c->base + LPC24XX_I2CONCLR);
+	}
+
+	enable_irq(i2c->irq);
+
+	/* Wait for transfer completion */
+	if (wait_event_timeout(i2c->wait, i2c->msg_status != -EBUSY,
+			       msecs_to_jiffies(1000)) == 0) {
+		disable_irq_nosync(i2c->irq);
+
+		return -ETIMEDOUT;
+	}
+
+	return i2c->msg_status;
+}
+
+static int i2c_lpc2k_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
+			  int msg_num)
+{
+	struct lpc2k_i2c *i2c = i2c_get_adapdata(adap);
+	int ret, i;
+	u32 stat;
+
+	/* Check for bus idle condition */
+	stat = readl(i2c->base + LPC24XX_I2STAT);
+	if (stat != M_I2C_IDLE) {
+		/* Something is holding the bus, try to clear it */
+		return i2c_lpc2k_clear_arb(i2c);
+	}
+
+	/* Process a single message at a time */
+	for (i = 0; i < msg_num; i++) {
+		/* Save message pointer and current message data index */
+		i2c->msg = &msgs[i];
+		i2c->msg_idx = 0;
+		i2c->msg_status = -EBUSY;
+		i2c->is_last = (i == (msg_num - 1));
+
+		ret = lpc2k_process_msg(i2c, i);
+		if (ret)
+			return ret;
+	}
+
+	return msg_num;
+}
+
+static irqreturn_t i2c_lpc2k_handler(int irq, void *dev_id)
+{
+	struct lpc2k_i2c *i2c = dev_id;
+
+	if (readl(i2c->base + LPC24XX_I2CONSET) & LPC24XX_SI) {
+		i2c_lpc2k_pump_msg(i2c);
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
+}
+
+static u32 i2c_lpc2k_functionality(struct i2c_adapter *adap)
+{
+	/* Only emulated SMBus for now */
+	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
+}
+
+static const struct i2c_algorithm i2c_lpc2k_algorithm = {
+	.master_xfer	= i2c_lpc2k_xfer,
+	.functionality	= i2c_lpc2k_functionality,
+};
+
+static int i2c_lpc2k_probe(struct platform_device *pdev)
+{
+	struct lpc2k_i2c *i2c;
+	struct resource *res;
+	u32 bus_clk_rate;
+	u32 scl_high;
+	u32 clkrate;
+	int ret;
+
+	i2c = devm_kzalloc(&pdev->dev, sizeof(*i2c), GFP_KERNEL);
+	if (!i2c)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	i2c->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(i2c->base))
+		return PTR_ERR(i2c->base);
+
+	i2c->irq = platform_get_irq(pdev, 0);
+	if (i2c->irq < 0) {
+		dev_err(&pdev->dev, "can't get interrupt resource\n");
+		return i2c->irq;
+	}
+
+	init_waitqueue_head(&i2c->wait);
+
+	i2c->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(i2c->clk)) {
+		dev_err(&pdev->dev, "error getting clock\n");
+		return PTR_ERR(i2c->clk);
+	}
+
+	ret = clk_prepare_enable(i2c->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "unable to enable clock.\n");
+		return ret;
+	}
+
+	ret = devm_request_irq(&pdev->dev, i2c->irq, i2c_lpc2k_handler, 0,
+			       dev_name(&pdev->dev), i2c);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "can't request interrupt.\n");
+		goto fail_clk;
+	}
+
+	disable_irq_nosync(i2c->irq);
+
+	/* Place controller is a known state */
+	i2c_lpc2k_reset(i2c);
+
+	ret = of_property_read_u32(pdev->dev.of_node, "clock-frequency",
+				   &bus_clk_rate);
+	if (ret)
+		bus_clk_rate = 100000; /* 100 kHz default clock rate */
+
+	clkrate = clk_get_rate(i2c->clk);
+	if (clkrate == 0) {
+		dev_err(&pdev->dev, "can't get I2C base clock\n");
+		ret = -EINVAL;
+		goto fail_clk;
+	}
+
+	/* Setup I2C dividers to generate clock with proper duty cycle */
+	clkrate = clkrate / bus_clk_rate;
+	if (bus_clk_rate <= 100000)
+		scl_high = (clkrate * I2C_STD_MODE_DUTY) / 100;
+	else if (bus_clk_rate <= 400000)
+		scl_high = (clkrate * I2C_FAST_MODE_DUTY) / 100;
+	else
+		scl_high = (clkrate * I2C_FAST_MODE_PLUS_DUTY) / 100;
+
+	writel(scl_high, i2c->base + LPC24XX_I2SCLH);
+	writel(clkrate - scl_high, i2c->base + LPC24XX_I2SCLL);
+
+	platform_set_drvdata(pdev, i2c);
+
+	i2c_set_adapdata(&i2c->adap, i2c);
+	i2c->adap.owner = THIS_MODULE;
+	strlcpy(i2c->adap.name, "LPC2K I2C adapter", sizeof(i2c->adap.name));
+	i2c->adap.algo = &i2c_lpc2k_algorithm;
+	i2c->adap.dev.parent = &pdev->dev;
+	i2c->adap.dev.of_node = pdev->dev.of_node;
+
+	ret = i2c_add_adapter(&i2c->adap);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "failed to add adapter!\n");
+		goto fail_clk;
+	}
+
+	dev_info(&pdev->dev, "LPC2K I2C adapter\n");
+
+	return 0;
+
+fail_clk:
+	clk_disable_unprepare(i2c->clk);
+	return ret;
+}
+
+static int i2c_lpc2k_remove(struct platform_device *dev)
+{
+	struct lpc2k_i2c *i2c = platform_get_drvdata(dev);
+
+	i2c_del_adapter(&i2c->adap);
+	clk_disable_unprepare(i2c->clk);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int i2c_lpc2k_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct lpc2k_i2c *i2c = platform_get_drvdata(pdev);
+
+	clk_disable(i2c->clk);
+
+	return 0;
+}
+
+static int i2c_lpc2k_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct lpc2k_i2c *i2c = platform_get_drvdata(pdev);
+
+	clk_enable(i2c->clk);
+	i2c_lpc2k_reset(i2c);
+
+	return 0;
+}
+
+static const struct dev_pm_ops i2c_lpc2k_dev_pm_ops = {
+	.suspend_noirq = i2c_lpc2k_suspend,
+	.resume_noirq = i2c_lpc2k_resume,
+};
+
+#define I2C_LPC2K_DEV_PM_OPS (&i2c_lpc2k_dev_pm_ops)
+#else
+#define I2C_LPC2K_DEV_PM_OPS NULL
+#endif
+
+static const struct of_device_id lpc2k_i2c_match[] = {
+	{ .compatible = "nxp,lpc1788-i2c" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, lpc2k_i2c_match);
+
+static struct platform_driver i2c_lpc2k_driver = {
+	.probe	= i2c_lpc2k_probe,
+	.remove	= i2c_lpc2k_remove,
+	.driver	= {
+		.name		= "lpc2k-i2c",
+		.pm		= I2C_LPC2K_DEV_PM_OPS,
+		.of_match_table	= lpc2k_i2c_match,
+	},
+};
+module_platform_driver(i2c_lpc2k_driver);
+
+MODULE_AUTHOR("Kevin Wells <kevin.wells@nxp.com>");
+MODULE_DESCRIPTION("I2C driver for LPC2xxx devices");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:lpc2k-i2c");

From 01eef96e37d77cd89156e5f51aab81a9d5c96539 Mon Sep 17 00:00:00 2001
From: Irina Tirdea <irina.tirdea@intel.com>
Date: Wed, 12 Aug 2015 17:31:33 +0300
Subject: [PATCH 373/734] i2c: core: Add support for best effort block read
 emulation

There are devices that need to handle block transactions
regardless of the capabilities exported by the adapter.
For performance reasons, they need to use i2c read blocks
if available, otherwise emulate the block transaction with word
or byte transactions.

Add support for a helper function that would read a data block
using the best transfer available: I2C_FUNC_SMBUS_READ_I2C_BLOCK,
I2C_FUNC_SMBUS_READ_WORD_DATA or I2C_FUNC_SMBUS_READ_BYTE_DATA.

Signed-off-by: Irina Tirdea <irina.tirdea@intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core.c | 57 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/i2c.h    |  3 +++
 2 files changed, 60 insertions(+)

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index a6780289c61d20..98f6c75b1d18e4 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -3007,6 +3007,63 @@ s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, unsigned short flags,
 }
 EXPORT_SYMBOL(i2c_smbus_xfer);
 
+/**
+ * i2c_smbus_read_i2c_block_data_or_emulated - read block or emulate
+ * @client: Handle to slave device
+ * @command: Byte interpreted by slave
+ * @length: Size of data block; SMBus allows at most I2C_SMBUS_BLOCK_MAX bytes
+ * @values: Byte array into which data will be read; big enough to hold
+ *	the data returned by the slave.  SMBus allows at most
+ *	I2C_SMBUS_BLOCK_MAX bytes.
+ *
+ * This executes the SMBus "block read" protocol if supported by the adapter.
+ * If block read is not supported, it emulates it using either word or byte
+ * read protocols depending on availability.
+ *
+ * The addresses of the I2C slave device that are accessed with this function
+ * must be mapped to a linear region, so that a block read will have the same
+ * effect as a byte read. Before using this function you must double-check
+ * if the I2C slave does support exchanging a block transfer with a byte
+ * transfer.
+ */
+s32 i2c_smbus_read_i2c_block_data_or_emulated(const struct i2c_client *client,
+					      u8 command, u8 length, u8 *values)
+{
+	u8 i = 0;
+	int status;
+
+	if (length > I2C_SMBUS_BLOCK_MAX)
+		length = I2C_SMBUS_BLOCK_MAX;
+
+	if (i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_READ_I2C_BLOCK))
+		return i2c_smbus_read_i2c_block_data(client, command, length, values);
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_READ_BYTE_DATA))
+		return -EOPNOTSUPP;
+
+	if (i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_READ_WORD_DATA)) {
+		while ((i + 2) <= length) {
+			status = i2c_smbus_read_word_data(client, command + i);
+			if (status < 0)
+				return status;
+			values[i] = status & 0xff;
+			values[i + 1] = status >> 8;
+			i += 2;
+		}
+	}
+
+	while (i < length) {
+		status = i2c_smbus_read_byte_data(client, command + i);
+		if (status < 0)
+			return status;
+		values[i] = status;
+		i++;
+	}
+
+	return i;
+}
+EXPORT_SYMBOL(i2c_smbus_read_i2c_block_data_or_emulated);
+
 #if IS_ENABLED(CONFIG_I2C_SLAVE)
 int i2c_slave_register(struct i2c_client *client, i2c_slave_cb_t slave_cb)
 {
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 5aea071372185e..768063baafbf5e 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -121,6 +121,9 @@ extern s32 i2c_smbus_read_i2c_block_data(const struct i2c_client *client,
 extern s32 i2c_smbus_write_i2c_block_data(const struct i2c_client *client,
 					  u8 command, u8 length,
 					  const u8 *values);
+extern s32
+i2c_smbus_read_i2c_block_data_or_emulated(const struct i2c_client *client,
+					  u8 command, u8 length, u8 *values);
 #endif /* I2C */
 
 /**

From 2cd9fbd0ed5ef12bf550c06126ebbe720574fc0e Mon Sep 17 00:00:00 2001
From: Irina Tirdea <irina.tirdea@intel.com>
Date: Wed, 12 Aug 2015 17:31:34 +0300
Subject: [PATCH 374/734] eeprom: at24: use
 i2c_smbus_read_i2c_block_data_or_emulated

For i2c busses that support only SMBUS extensions, the eeprom at24
driver reads data from the device using the SMBus block, word or byte
read protocols depending on availability.

Replace the block read emulation from the driver with the
i2c_smbus_read_i2c_block_data_or_emulated call from i2c core.

Signed-off-by: Irina Tirdea <irina.tirdea@intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/misc/eeprom/at24.c | 37 ++++++-------------------------------
 1 file changed, 6 insertions(+), 31 deletions(-)

diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
index 2b254f3a1154e6..c6cb7f8f325e91 100644
--- a/drivers/misc/eeprom/at24.c
+++ b/drivers/misc/eeprom/at24.c
@@ -186,19 +186,11 @@ static ssize_t at24_eeprom_read(struct at24_data *at24, char *buf,
 	if (count > io_limit)
 		count = io_limit;
 
-	switch (at24->use_smbus) {
-	case I2C_SMBUS_I2C_BLOCK_DATA:
+	if (at24->use_smbus) {
 		/* Smaller eeproms can work given some SMBus extension calls */
 		if (count > I2C_SMBUS_BLOCK_MAX)
 			count = I2C_SMBUS_BLOCK_MAX;
-		break;
-	case I2C_SMBUS_WORD_DATA:
-		count = 2;
-		break;
-	case I2C_SMBUS_BYTE_DATA:
-		count = 1;
-		break;
-	default:
+	} else {
 		/*
 		 * When we have a better choice than SMBus calls, use a
 		 * combined I2C message. Write address; then read up to
@@ -229,27 +221,10 @@ static ssize_t at24_eeprom_read(struct at24_data *at24, char *buf,
 	timeout = jiffies + msecs_to_jiffies(write_timeout);
 	do {
 		read_time = jiffies;
-		switch (at24->use_smbus) {
-		case I2C_SMBUS_I2C_BLOCK_DATA:
-			status = i2c_smbus_read_i2c_block_data(client, offset,
-					count, buf);
-			break;
-		case I2C_SMBUS_WORD_DATA:
-			status = i2c_smbus_read_word_data(client, offset);
-			if (status >= 0) {
-				buf[0] = status & 0xff;
-				buf[1] = status >> 8;
-				status = count;
-			}
-			break;
-		case I2C_SMBUS_BYTE_DATA:
-			status = i2c_smbus_read_byte_data(client, offset);
-			if (status >= 0) {
-				buf[0] = status;
-				status = count;
-			}
-			break;
-		default:
+		if (at24->use_smbus) {
+			status = i2c_smbus_read_i2c_block_data_or_emulated(client, offset,
+									   count, buf);
+		} else {
 			status = i2c_transfer(client->adapter, msg, 2);
 			if (status == 2)
 				status = count;

From 480b141abf7ffe630016dd4ced0fc57f105a05a9 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@osg.samsung.com>
Date: Wed, 19 Aug 2015 13:19:57 +0200
Subject: [PATCH 375/734] i2c: Replace I2C_CROS_EC_TUNNEL dependency

The ChromeOS EC tunnel I2C bus driver depend on CROS_EC_PROTO but
MFD_CROS_EC select CROS_EC_PROTO instead. Mixing select and depends
on is bad practice as it may lead to circular Kconfig dependencies.

Since the platform device that is matched with the I2C bus driver
is registered by the ChromeOS EC mfd driver, I2C_CROS_EC_TUNNEL
really depends on MFD_CROS_EC. And because this config option
selects CROS_EC_PROTO, that dependency is met as well. So make the
driver to depend on MFD_CROS_EC instead of CROS_EC_PROTO.

Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 48f4b796003c20..08b86178e8fba9 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -1140,7 +1140,7 @@ config I2C_SIBYTE
 
 config I2C_CROS_EC_TUNNEL
 	tristate "ChromeOS EC tunnel I2C bus"
-	depends on CROS_EC_PROTO
+	depends on MFD_CROS_EC
 	help
 	  If you say yes here you get an I2C bus that will tunnel i2c commands
 	  through to the other side of the ChromeOS EC to the i2c bus

From a4a9a8cb3248b3b48a6049acaeaf23d4ad3712b9 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 18 Aug 2015 12:12:19 +0300
Subject: [PATCH 376/734] i2c: xgene-slimpro: dma_mapping_error() doesn't
 return an error code

The dma_mapping_error() function returns true if there is an error, it
doesn't return an error code.  We should return -ENOMEM.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Cc: stable@kernel.org
---
 drivers/i2c/busses/i2c-xgene-slimpro.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/i2c/busses/i2c-xgene-slimpro.c b/drivers/i2c/busses/i2c-xgene-slimpro.c
index 1c9cb65ac4cf8a..4233f5695352fd 100644
--- a/drivers/i2c/busses/i2c-xgene-slimpro.c
+++ b/drivers/i2c/busses/i2c-xgene-slimpro.c
@@ -198,10 +198,10 @@ static int slimpro_i2c_blkrd(struct slimpro_i2c_dev *ctx, u32 chip, u32 addr,
 	int rc;
 
 	paddr = dma_map_single(ctx->dev, ctx->dma_buffer, readlen, DMA_FROM_DEVICE);
-	rc = dma_mapping_error(ctx->dev, paddr);
-	if (rc) {
+	if (dma_mapping_error(ctx->dev, paddr)) {
 		dev_err(&ctx->adapter.dev, "Error in mapping dma buffer %p\n",
 			ctx->dma_buffer);
+		rc = -ENOMEM;
 		goto err;
 	}
 
@@ -241,10 +241,10 @@ static int slimpro_i2c_blkwr(struct slimpro_i2c_dev *ctx, u32 chip,
 	memcpy(ctx->dma_buffer, data, writelen);
 	paddr = dma_map_single(ctx->dev, ctx->dma_buffer, writelen,
 			       DMA_TO_DEVICE);
-	rc = dma_mapping_error(ctx->dev, paddr);
-	if (rc) {
+	if (dma_mapping_error(ctx->dev, paddr)) {
 		dev_err(&ctx->adapter.dev, "Error in mapping dma buffer %p\n",
 			ctx->dma_buffer);
+		rc = -ENOMEM;
 		goto err;
 	}
 

From d721109611fb94aff53c2397859046e5f92f55ae Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Fri, 24 Jul 2015 14:18:45 +0900
Subject: [PATCH 377/734] kbuild: fixdep: optimize code slightly

If the target string matches "CONFIG_", move the pointer p
forward.  This saves several 7-chars adjustments.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Michal Marek <mmarek@suse.com>
---
 scripts/basic/fixdep.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c
index b30406860b7397..46cc1b3e5de2fb 100644
--- a/scripts/basic/fixdep.c
+++ b/scripts/basic/fixdep.c
@@ -251,7 +251,8 @@ static void parse_config_file(const char *map, size_t len)
 			continue;
 		if (memcmp(p, "CONFIG_", 7))
 			continue;
-		for (q = p + 7; q < map + len; q++) {
+		p += 7;
+		for (q = p; q < map + len; q++) {
 			if (!(isalnum(*q) || *q == '_'))
 				goto found;
 		}
@@ -260,9 +261,9 @@ static void parse_config_file(const char *map, size_t len)
 	found:
 		if (!memcmp(q - 7, "_MODULE", 7))
 			q -= 7;
-		if( (q-p-7) < 0 )
+		if (q - p < 0)
 			continue;
-		use_config(p+7, q-p-7);
+		use_config(p, q - p);
 	}
 }
 

From d179e22762fd38414c4108acedd5feca4cf7e0d8 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Fri, 24 Jul 2015 14:18:46 +0900
Subject: [PATCH 378/734] kbuild: fixdep: drop meaningless hash table
 initialization

The clear_config() is called just once at the beginning of this
program, but the global variable hashtab[] is already zero-filled
at the start-up.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Michal Marek <mmarek@suse.com>
---
 scripts/basic/fixdep.c | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c
index 46cc1b3e5de2fb..c68fd61fdc42e1 100644
--- a/scripts/basic/fixdep.c
+++ b/scripts/basic/fixdep.c
@@ -191,23 +191,6 @@ static void define_config(const char *name, int len, unsigned int hash)
 	hashtab[hash % HASHSZ] = aux;
 }
 
-/*
- * Clear the set of configuration strings.
- */
-static void clear_config(void)
-{
-	struct item *aux, *next;
-	unsigned int i;
-
-	for (i = 0; i < HASHSZ; i++) {
-		for (aux = hashtab[i]; aux; aux = next) {
-			next = aux->next;
-			free(aux);
-		}
-		hashtab[i] = NULL;
-	}
-}
-
 /*
  * Record the use of a CONFIG_* word.
  */
@@ -325,8 +308,6 @@ static void parse_dep_file(void *map, size_t len)
 	int saw_any_target = 0;
 	int is_first_dep = 0;
 
-	clear_config();
-
 	while (m < end) {
 		/* Skip any "white space" */
 		while (m < end && (*m == ' ' || *m == '\\' || *m == '\n'))

From ca30475698696af3a03f6eaee16472ae09d42269 Mon Sep 17 00:00:00 2001
From: "Xiao, Nan" <nan.xiao@hp.com>
Date: Mon, 24 Aug 2015 06:22:42 +0000
Subject: [PATCH 379/734] x86/vt-d: Fix documentation of DRHD

According to "Intel Virtualization Technology for Directed
I/O" specification, DRHD stands for "DMA Remapping Hardware
Unit Definition" , not "DMA Engine Reporting Structure".

Signed-off-by: Nan Xiao <nan.xiao@hp.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 Documentation/Intel-IOMMU.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/Intel-IOMMU.txt b/Documentation/Intel-IOMMU.txt
index cf9431db873150..7b57fc087088f4 100644
--- a/Documentation/Intel-IOMMU.txt
+++ b/Documentation/Intel-IOMMU.txt
@@ -10,7 +10,7 @@ This guide gives a quick cheat sheet for some basic understanding.
 Some Keywords
 
 DMAR - DMA remapping
-DRHD - DMA Engine Reporting Structure
+DRHD - DMA Remapping Hardware Unit Definition
 RMRR - Reserved memory Region Reporting Structure
 ZLR  - Zero length reads from PCI devices
 IOVA - IO Virtual address.

From 4df4eab168c1c4058603be55a3169d4a45779cc0 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Tue, 25 Aug 2015 10:54:28 +0200
Subject: [PATCH 380/734] iommu/vt-d: Really use upper context table when
 necessary

There is a bug in iommu_context_addr() which will always use
the lower context table, even when the upper context table
needs to be used. Fix this issue.

Fixes: 03ecc32c5274 ("iommu/vt-d: support extended root and context entries")
Reported-by: Xiao, Nan <nan.xiao@hp.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index a85077d4335b50..63daf1ba04b7ed 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -803,6 +803,7 @@ static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu
 	struct context_entry *context;
 	u64 *entry;
 
+	entry = &root->lo;
 	if (ecs_enabled(iommu)) {
 		if (devfn >= 0x80) {
 			devfn -= 0x80;
@@ -810,7 +811,6 @@ static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu
 		}
 		devfn *= 2;
 	}
-	entry = &root->lo;
 	if (*entry & 1)
 		context = phys_to_virt(*entry & VTD_PAGE_MASK);
 	else {

From fc0bfacd045a17e385b4272dfe2387ba3a6d8745 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Giedrius=20Statkevi=C4=8Dius?=
 <giedrius.statkevicius@gmail.com>
Date: Sun, 23 Aug 2015 14:41:09 +0300
Subject: [PATCH 381/734] hp-wireless: remove unneeded goto/label in hpwl_init
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

acpi_bus_register_driver() already returns an appropriate value (0 on
success, and some negative value on error) to be used in __init functions
so the goto/label is redundant in hpwl_init thus remove it and directly
return the value

Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@gmail.com>
Signed-off-by: Darren Hart <dvhart@linux.intel.com>
---
 drivers/platform/x86/hp-wireless.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/platform/x86/hp-wireless.c b/drivers/platform/x86/hp-wireless.c
index 4e4cc8bd7557d7..988eedbd7c636e 100644
--- a/drivers/platform/x86/hp-wireless.c
+++ b/drivers/platform/x86/hp-wireless.c
@@ -114,14 +114,9 @@ static int __init hpwl_init(void)
 
 	pr_info("Initializing HPQ6001 module\n");
 	err = acpi_bus_register_driver(&hpwl_driver);
-	if (err) {
+	if (err)
 		pr_err("Unable to register HP wireless control driver.\n");
-		goto error_acpi_register;
-	}
-
-	return 0;
 
-error_acpi_register:
 	return err;
 }
 

From 2508a45a924dfa4a5f6e60675aa4732d888134a7 Mon Sep 17 00:00:00 2001
From: Chen Yu <yu.c.chen@intel.com>
Date: Tue, 18 Aug 2015 23:30:25 +0800
Subject: [PATCH 382/734] surface pro 3: Add support driver for Surface Pro 3
 buttons

Since Surface Pro 3 does not follow the specs of "Windows ACPI Design
Guide for SoC Platform", code in drivers/input/misc/soc_array.c can
not detect these buttons on it. According to bios implementation,
Surface Pro 3 encapsulates these buttons in a device named "VGBI",
with _HID "MSHW0028". When any of the buttons is pressed, a specify
ACPI notification code for this button will be delivered to "VGBI". For
example, if power button is pressed down, ACPI notification code of 0xc6
will be sent by Notify(VGBI, 0xc6).

This patch leverages "VGBI" to distinguish different ACPI notification
code from Power button, Home button, Volume button, then dispatches these
code to input layer. Lid is already covered by acpi button driver, so
there's no need to rewrite.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=84651
Tested-by: Ethan Schoonover <es@ethanschoonover.com>
Tested-by: Peter Amidon <psa.pub.0@picnicpark.org>
Tested-by: Donavan Lance <tusklahoma@gmail.com>
Tested-by: Stephen Just <stephenjust@gmail.com>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
[dvhart@linux.intel.com: Formatting corrections in MAINTAINERS and Intel (c)]
Signed-off-by: Darren Hart <dvhart@linux.intel.com>
---
 MAINTAINERS                               |   6 +
 drivers/platform/x86/Kconfig              |   5 +
 drivers/platform/x86/Makefile             |   1 +
 drivers/platform/x86/surfacepro3_button.c | 216 ++++++++++++++++++++++
 4 files changed, 228 insertions(+)
 create mode 100644 drivers/platform/x86/surfacepro3_button.c

diff --git a/MAINTAINERS b/MAINTAINERS
index cbeb4c145d8f7b..884d398162864a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6709,6 +6709,12 @@ T:	git git://git.monstr.eu/linux-2.6-microblaze.git
 S:	Supported
 F:	arch/microblaze/
 
+MICROSOFT SURFACE PRO 3 BUTTON DRIVER
+M:	Chen Yu <yu.c.chen@intel.com>
+L:	platform-driver-x86@vger.kernel.org
+S:	Supported
+F:	drivers/platform/x86/surfacepro3_button.c
+
 MICROTEK X6 SCANNER
 M:	Oliver Neukum <oliver@neukum.org>
 S:	Maintained
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 6dc13e4de3962e..c69bb703f48356 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -919,4 +919,9 @@ config INTEL_PMC_IPC
 	The PMC is an ARC processor which defines IPC commands for communication
 	with other entities in the CPU.
 
+config SURFACE_PRO3_BUTTON
+	tristate "Power/home/volume buttons driver for Microsoft Surface Pro 3 tablet"
+	depends on ACPI && INPUT
+	---help---
+	  This driver handles the power/home/volume buttons on the Microsoft Surface Pro 3 tablet.
 endif # X86_PLATFORM_DEVICES
diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index dda95a98532101..ada5128190285c 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile
@@ -60,3 +60,4 @@ obj-$(CONFIG_INTEL_SMARTCONNECT)	+= intel-smartconnect.o
 obj-$(CONFIG_PVPANIC)           += pvpanic.o
 obj-$(CONFIG_ALIENWARE_WMI)	+= alienware-wmi.o
 obj-$(CONFIG_INTEL_PMC_IPC)	+= intel_pmc_ipc.o
+obj-$(CONFIG_SURFACE_PRO3_BUTTON)	+= surfacepro3_button.o
diff --git a/drivers/platform/x86/surfacepro3_button.c b/drivers/platform/x86/surfacepro3_button.c
new file mode 100644
index 00000000000000..f7dade3fd2ab92
--- /dev/null
+++ b/drivers/platform/x86/surfacepro3_button.c
@@ -0,0 +1,216 @@
+/*
+ * power/home/volume button support for
+ * Microsoft Surface Pro 3 tablet.
+ *
+ * Copyright (c) 2015 Intel Corporation.
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/input.h>
+#include <linux/acpi.h>
+#include <acpi/button.h>
+
+#define SURFACE_BUTTON_HID		"MSHW0028"
+#define SURFACE_BUTTON_OBJ_NAME		"VGBI"
+#define SURFACE_BUTTON_DEVICE_NAME	"Surface Pro 3 Buttons"
+
+#define SURFACE_BUTTON_NOTIFY_PRESS_POWER	0xc6
+#define SURFACE_BUTTON_NOTIFY_RELEASE_POWER	0xc7
+
+#define SURFACE_BUTTON_NOTIFY_PRESS_HOME	0xc4
+#define SURFACE_BUTTON_NOTIFY_RELEASE_HOME	0xc5
+
+#define SURFACE_BUTTON_NOTIFY_PRESS_VOLUME_UP	0xc0
+#define SURFACE_BUTTON_NOTIFY_RELEASE_VOLUME_UP	0xc1
+
+#define SURFACE_BUTTON_NOTIFY_PRESS_VOLUME_DOWN	0xc2
+#define SURFACE_BUTTON_NOTIFY_RELEASE_VOLUME_DOWN	0xc3
+
+ACPI_MODULE_NAME("surface pro 3 button");
+
+MODULE_AUTHOR("Chen Yu");
+MODULE_DESCRIPTION("Surface Pro3 Button Driver");
+MODULE_LICENSE("GPL v2");
+
+/*
+ * Power button, Home button, Volume buttons support is supposed to
+ * be covered by drivers/input/misc/soc_button_array.c, which is implemented
+ * according to "Windows ACPI Design Guide for SoC Platforms".
+ * However surface pro3 seems not to obey the specs, instead it uses
+ * device VGBI(MSHW0028) for dispatching the events.
+ * We choose acpi_driver rather than platform_driver/i2c_driver because
+ * although VGBI has an i2c resource connected to i2c controller, it
+ * is not embedded in any i2c controller's scope, thus neither platform_device
+ * will be created, nor i2c_client will be enumerated, we have to use
+ * acpi_driver.
+ */
+static const struct acpi_device_id surface_button_device_ids[] = {
+	{SURFACE_BUTTON_HID,    0},
+	{"", 0},
+};
+MODULE_DEVICE_TABLE(acpi, surface_button_device_ids);
+
+struct surface_button {
+	unsigned int type;
+	struct input_dev *input;
+	char phys[32];			/* for input device */
+	unsigned long pushed;
+	bool suspended;
+};
+
+static void surface_button_notify(struct acpi_device *device, u32 event)
+{
+	struct surface_button *button = acpi_driver_data(device);
+	struct input_dev *input;
+	int key_code = KEY_RESERVED;
+	bool pressed = false;
+
+	switch (event) {
+	/* Power button press,release handle */
+	case SURFACE_BUTTON_NOTIFY_PRESS_POWER:
+		pressed = true;
+		/*fall through*/
+	case SURFACE_BUTTON_NOTIFY_RELEASE_POWER:
+		key_code = KEY_POWER;
+		break;
+	/* Home button press,release handle */
+	case SURFACE_BUTTON_NOTIFY_PRESS_HOME:
+		pressed = true;
+		/*fall through*/
+	case SURFACE_BUTTON_NOTIFY_RELEASE_HOME:
+		key_code = KEY_LEFTMETA;
+		break;
+	/* Volume up button press,release handle */
+	case SURFACE_BUTTON_NOTIFY_PRESS_VOLUME_UP:
+		pressed = true;
+		/*fall through*/
+	case SURFACE_BUTTON_NOTIFY_RELEASE_VOLUME_UP:
+		key_code = KEY_VOLUMEUP;
+		break;
+	/* Volume down button press,release handle */
+	case SURFACE_BUTTON_NOTIFY_PRESS_VOLUME_DOWN:
+		pressed = true;
+		/*fall through*/
+	case SURFACE_BUTTON_NOTIFY_RELEASE_VOLUME_DOWN:
+		key_code = KEY_VOLUMEDOWN;
+		break;
+	default:
+		dev_info_ratelimited(&device->dev,
+				  "Unsupported event [0x%x]\n", event);
+		break;
+	}
+	input = button->input;
+	if (KEY_RESERVED == key_code)
+		return;
+	if (pressed)
+		pm_wakeup_event(&device->dev, 0);
+	if (button->suspended)
+		return;
+	input_report_key(input, key_code, pressed?1:0);
+	input_sync(input);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int surface_button_suspend(struct device *dev)
+{
+	struct acpi_device *device = to_acpi_device(dev);
+	struct surface_button *button = acpi_driver_data(device);
+
+	button->suspended = true;
+	return 0;
+}
+
+static int surface_button_resume(struct device *dev)
+{
+	struct acpi_device *device = to_acpi_device(dev);
+	struct surface_button *button = acpi_driver_data(device);
+
+	button->suspended = false;
+	return 0;
+}
+#endif
+
+static int surface_button_add(struct acpi_device *device)
+{
+	struct surface_button *button;
+	struct input_dev *input;
+	const char *hid = acpi_device_hid(device);
+	char *name;
+	int error;
+
+	if (strncmp(acpi_device_bid(device), SURFACE_BUTTON_OBJ_NAME,
+	    strlen(SURFACE_BUTTON_OBJ_NAME)))
+		return -ENODEV;
+
+	button = kzalloc(sizeof(struct surface_button), GFP_KERNEL);
+	if (!button)
+		return -ENOMEM;
+
+	device->driver_data = button;
+	button->input = input = input_allocate_device();
+	if (!input) {
+		error = -ENOMEM;
+		goto err_free_button;
+	}
+
+	name = acpi_device_name(device);
+	strcpy(name, SURFACE_BUTTON_DEVICE_NAME);
+	snprintf(button->phys, sizeof(button->phys), "%s/buttons", hid);
+
+	input->name = name;
+	input->phys = button->phys;
+	input->id.bustype = BUS_HOST;
+	input->dev.parent = &device->dev;
+	input_set_capability(input, EV_KEY, KEY_POWER);
+	input_set_capability(input, EV_KEY, KEY_LEFTMETA);
+	input_set_capability(input, EV_KEY, KEY_VOLUMEUP);
+	input_set_capability(input, EV_KEY, KEY_VOLUMEDOWN);
+
+	error = input_register_device(input);
+	if (error)
+		goto err_free_input;
+	dev_info(&device->dev,
+			"%s [%s]\n", name, acpi_device_bid(device));
+	return 0;
+
+ err_free_input:
+	input_free_device(input);
+ err_free_button:
+	kfree(button);
+	return error;
+}
+
+static int surface_button_remove(struct acpi_device *device)
+{
+	struct surface_button *button = acpi_driver_data(device);
+
+	input_unregister_device(button->input);
+	kfree(button);
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(surface_button_pm,
+		surface_button_suspend, surface_button_resume);
+
+static struct acpi_driver surface_button_driver = {
+	.name = "surface_pro3_button",
+	.class = "SurfacePro3",
+	.ids = surface_button_device_ids,
+	.ops = {
+		.add = surface_button_add,
+		.remove = surface_button_remove,
+		.notify = surface_button_notify,
+	},
+	.drv.pm = &surface_button_pm,
+};
+
+module_acpi_driver(surface_button_driver);

From a9dd941d840714b96b0a60451f6a3aa3a9be8fc6 Mon Sep 17 00:00:00 2001
From: Mike Marciniszyn <mike.marciniszyn@intel.com>
Date: Thu, 13 Aug 2015 17:16:10 -0400
Subject: [PATCH 383/734] kbuild: avoid listing /lib/modules in kernel spec
 file

This causes conflicts when using multiple kernels built
with this mechanism.

Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Michal Marek <mmarek@suse.com>
---
 scripts/package/mkspec | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scripts/package/mkspec b/scripts/package/mkspec
index 89f9669d4f0070..71004daefe31b6 100755
--- a/scripts/package/mkspec
+++ b/scripts/package/mkspec
@@ -140,7 +140,6 @@ echo "fi"
 echo ""
 echo "%files"
 echo '%defattr (-, root, root)'
-echo "%dir /lib/modules"
 echo "/lib/modules/$KERNELRELEASE"
 echo "%exclude /lib/modules/$KERNELRELEASE/build"
 echo "%exclude /lib/modules/$KERNELRELEASE/source"

From 6602c452caf532893c1bde50aacda0bd17f958aa Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 25 Aug 2015 17:04:02 -0700
Subject: [PATCH 384/734] of/irq: export of_get_irq_byname()

Similarly to of_get_irq(), let's export of_irq_get_byname(), so if a bus core
can be compiled as a module (such as I2C) it can have access to the symbol.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/of/irq.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 3cf7a01f557f4f..e9eaed1dc996cf 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -431,6 +431,7 @@ int of_irq_get_byname(struct device_node *dev, const char *name)
 
 	return of_irq_get(dev, index);
 }
+EXPORT_SYMBOL_GPL(of_irq_get_byname);
 
 /**
  * of_irq_count - Count the number of IRQs a node uses

From 3fffd12839273429a185d68431f117f0a3654b07 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 17 Aug 2015 23:52:51 -0700
Subject: [PATCH 385/734] i2c: allow specifying separate wakeup interrupt in
 device tree

Instead of having each i2c driver individually parse device tree data in
case it or platform supports separate wakeup interrupt, and handle
enabling and disabling wakeup interrupts in their power management
routines, let's have i2c core do that for us.

Platforms wishing to specify separate wakeup interrupt for the device
should use named interrupt syntax in their DTSes:

	interrupt-parent = <&intc1>;
	interrupts = <5 0>, <6 0>;
	interrupt-names = "irq", "wakeup";

This patch is inspired by work done by Vignesh R <vigneshr@ti.com> for
pixcir_i2c_ts driver.

Note that the original code tried to preserve any existing wakeup
settings from userspace but was not quite right in that regard:
it would preserve wakeup flag set by userspace upon driver rebinding;
but it would re-arm the wakeup flag if it was disabled by userspace.

We think that resetting the flag upon re-binding the driver is proper
behavior as the driver is responsible for setting up and handling
wakeups.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Tested-by: Vignesh R <vigneshr@ti.com>
[wsa: updated the commit message]
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 Documentation/devicetree/bindings/i2c/i2c.txt | 16 +++++-
 drivers/i2c/i2c-core.c                        | 51 +++++++++++++++----
 2 files changed, 56 insertions(+), 11 deletions(-)

diff --git a/Documentation/devicetree/bindings/i2c/i2c.txt b/Documentation/devicetree/bindings/i2c/i2c.txt
index 1175efed4a41b5..8a99150ac3a7fd 100644
--- a/Documentation/devicetree/bindings/i2c/i2c.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c.txt
@@ -12,7 +12,7 @@ Required properties
 - compatible      - name of I2C bus controller following generic names
 		    recommended practice.
 
-For other required properties e.g. to describe register sets, interrupts,
+For other required properties e.g. to describe register sets,
 clocks, etc. check the binding documentation of the specific driver.
 
 The cells properties above define that an address of children of an I2C bus
@@ -29,5 +29,17 @@ Optional properties
 These properties may not be supported by all drivers. However, if a driver
 wants to support one of the below features, it should adapt the bindings below.
 
-- clock-frequency	- frequency of bus clock in Hz
+- clock-frequency	- frequency of bus clock in Hz.
 - wakeup-source		- device can be used as a wakeup source.
+
+- interrupts		- interrupts used by the device.
+- interrupt-names	- "irq" and "wakeup" names are recognized by I2C core,
+			  other names are left to individual drivers.
+
+Binding may contain optional "interrupts" property, describing interrupts
+used by the device. I2C core will assign "irq" interrupt (or the very first
+interrupt if not using interrupt names) as primary interrupt for the slave.
+
+Also, if device is marked as a wakeup source, I2C core will set up "wakeup"
+interrupt for the device. If "wakeup" interrupt name is not present in the
+binding, then primary interrupt will be used as wakeup interrupt.
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 98f6c75b1d18e4..5f89f1e3c2f24f 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -48,6 +48,7 @@
 #include <linux/rwsem.h>
 #include <linux/pm_runtime.h>
 #include <linux/pm_domain.h>
+#include <linux/pm_wakeirq.h>
 #include <linux/acpi.h>
 #include <linux/jump_label.h>
 #include <asm/uaccess.h>
@@ -645,11 +646,13 @@ static int i2c_device_probe(struct device *dev)
 	if (!client->irq) {
 		int irq = -ENOENT;
 
-		if (dev->of_node)
-			irq = of_irq_get(dev->of_node, 0);
-		else if (ACPI_COMPANION(dev))
+		if (dev->of_node) {
+			irq = of_irq_get_byname(dev->of_node, "irq");
+			if (irq == -EINVAL || irq == -ENODATA)
+				irq = of_irq_get(dev->of_node, 0);
+		} else if (ACPI_COMPANION(dev)) {
 			irq = acpi_dev_gpio_irq_get(ACPI_COMPANION(dev), 0);
-
+		}
 		if (irq == -EPROBE_DEFER)
 			return irq;
 		if (irq < 0)
@@ -662,23 +665,49 @@ static int i2c_device_probe(struct device *dev)
 	if (!driver->probe || !driver->id_table)
 		return -ENODEV;
 
-	if (!device_can_wakeup(&client->dev))
-		device_init_wakeup(&client->dev,
-					client->flags & I2C_CLIENT_WAKE);
+	if (client->flags & I2C_CLIENT_WAKE) {
+		int wakeirq = -ENOENT;
+
+		if (dev->of_node) {
+			wakeirq = of_irq_get_byname(dev->of_node, "wakeup");
+			if (wakeirq == -EPROBE_DEFER)
+				return wakeirq;
+		}
+
+		device_init_wakeup(&client->dev, true);
+
+		if (wakeirq > 0 && wakeirq != client->irq)
+			status = dev_pm_set_dedicated_wake_irq(dev, wakeirq);
+		else if (client->irq > 0)
+			status = dev_pm_set_wake_irq(dev, wakeirq);
+		else
+			status = 0;
+
+		if (status)
+			dev_warn(&client->dev, "failed to set up wakeup irq");
+	}
+
 	dev_dbg(dev, "probe\n");
 
 	status = of_clk_set_defaults(dev->of_node, false);
 	if (status < 0)
-		return status;
+		goto err_clear_wakeup_irq;
 
 	status = dev_pm_domain_attach(&client->dev, true);
 	if (status != -EPROBE_DEFER) {
 		status = driver->probe(client, i2c_match_id(driver->id_table,
 					client));
 		if (status)
-			dev_pm_domain_detach(&client->dev, true);
+			goto err_detach_pm_domain;
 	}
 
+	return 0;
+
+err_detach_pm_domain:
+	dev_pm_domain_detach(&client->dev, true);
+err_clear_wakeup_irq:
+	dev_pm_clear_wake_irq(&client->dev);
+	device_init_wakeup(&client->dev, false);
 	return status;
 }
 
@@ -698,6 +727,10 @@ static int i2c_device_remove(struct device *dev)
 	}
 
 	dev_pm_domain_detach(&client->dev, true);
+
+	dev_pm_clear_wake_irq(&client->dev);
+	device_init_wakeup(&client->dev, false);
+
 	return status;
 }
 

From 30b139dfe0bfa8727ceec2a1d5294766943dcdc8 Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Wed, 26 Aug 2015 14:36:46 +0100
Subject: [PATCH 386/734] scripts: add extract-cert and sign-file to .gitignore

...so "git status" doesn't nag us about them.

Cc: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 scripts/.gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/.gitignore b/scripts/.gitignore
index 5ecfe93f202871..12efbbefd4d754 100644
--- a/scripts/.gitignore
+++ b/scripts/.gitignore
@@ -10,3 +10,5 @@ recordmcount
 docproc
 sortextable
 asn1_compiler
+extract-cert
+sign-file

From 22d7e85ff8e5826845e9a4fa34b4723e5a97ee9b Mon Sep 17 00:00:00 2001
From: Robin van der Gracht <robin@protonic.nl>
Date: Tue, 4 Aug 2015 08:58:33 +0200
Subject: [PATCH 387/734] mmc: core: Fixed bug in one erase-group budget TRIM

When requesting a trim for several bytes, everything up to the next
erase-group is erased. This causes data corruption.

Signed-off-by: Robin van der Gracht <robin@protonic.nl>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/core.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 083cade3ffc5d1..57edb2a9bb04bb 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -2227,9 +2227,8 @@ int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr,
 	 * and call mmc_do_erase() twice if necessary. This special case is
 	 * identified by the card->eg_boundary flag.
 	 */
-	if ((arg & MMC_TRIM_ARGS) && (card->eg_boundary) &&
-	    (from % card->erase_size)) {
-		rem = card->erase_size - (from % card->erase_size);
+	rem = card->erase_size - (from % card->erase_size);
+	if ((arg & MMC_TRIM_ARGS) && (card->eg_boundary) && (nr > rem)) {
 		err = mmc_do_erase(card, from, from + rem - 1, arg);
 		from += rem;
 		if ((err) || (to <= from))

From 143b648ddf1583905fa15d32be27a31442fc7933 Mon Sep 17 00:00:00 2001
From: Adam Lee <adam.lee@canonical.com>
Date: Mon, 3 Aug 2015 14:33:28 +0800
Subject: [PATCH 388/734] mmc: sdhci-pci: set the clear transfer mode register
 quirk for O2Micro

This patch fixes MMC not working issue on O2Micro/BayHub Host, which
requires transfer mode register to be cleared when sending no DMA
command.

Signed-off-by: Peter Guo <peter.guo@bayhubtech.com>
Signed-off-by: Adam Lee <adam.lee@canonical.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-pci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index 94f54d2772e885..b3b0a3e4fca165 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -618,6 +618,7 @@ static int jmicron_resume(struct sdhci_pci_chip *chip)
 static const struct sdhci_pci_fixes sdhci_o2 = {
 	.probe = sdhci_pci_o2_probe,
 	.quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
+	.quirks2 = SDHCI_QUIRK2_CLEAR_TRANSFERMODE_REG_BEFORE_CMD,
 	.probe_slot = sdhci_pci_o2_probe_slot,
 	.resume = sdhci_pci_o2_resume,
 };

From 90614cd9045dc7003913ee58cbc77950351485a0 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Fri, 7 Aug 2015 01:06:48 +0300
Subject: [PATCH 389/734] mmc: host: use of_property_read_bool()

Use more compact of_property_read_bool() calls instead of the
of_find_property() calls.

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/host.c | 42 ++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 99a9c9011c5010..abd933b7029bec 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -398,7 +398,7 @@ int mmc_of_parse(struct mmc_host *host)
 {
 	struct device_node *np;
 	u32 bus_width;
-	int len, ret;
+	int ret;
 	bool cd_cap_invert, cd_gpio_invert = false;
 	bool ro_cap_invert, ro_gpio_invert = false;
 
@@ -445,12 +445,12 @@ int mmc_of_parse(struct mmc_host *host)
 	 */
 
 	/* Parse Card Detection */
-	if (of_find_property(np, "non-removable", &len)) {
+	if (of_property_read_bool(np, "non-removable")) {
 		host->caps |= MMC_CAP_NONREMOVABLE;
 	} else {
 		cd_cap_invert = of_property_read_bool(np, "cd-inverted");
 
-		if (of_find_property(np, "broken-cd", &len))
+		if (of_property_read_bool(np, "broken-cd"))
 			host->caps |= MMC_CAP_NEEDS_POLL;
 
 		ret = mmc_gpiod_request_cd(host, "cd", 0, true,
@@ -491,41 +491,41 @@ int mmc_of_parse(struct mmc_host *host)
 	if (ro_cap_invert ^ ro_gpio_invert)
 		host->caps2 |= MMC_CAP2_RO_ACTIVE_HIGH;
 
-	if (of_find_property(np, "cap-sd-highspeed", &len))
+	if (of_property_read_bool(np, "cap-sd-highspeed"))
 		host->caps |= MMC_CAP_SD_HIGHSPEED;
-	if (of_find_property(np, "cap-mmc-highspeed", &len))
+	if (of_property_read_bool(np, "cap-mmc-highspeed"))
 		host->caps |= MMC_CAP_MMC_HIGHSPEED;
-	if (of_find_property(np, "sd-uhs-sdr12", &len))
+	if (of_property_read_bool(np, "sd-uhs-sdr12"))
 		host->caps |= MMC_CAP_UHS_SDR12;
-	if (of_find_property(np, "sd-uhs-sdr25", &len))
+	if (of_property_read_bool(np, "sd-uhs-sdr25"))
 		host->caps |= MMC_CAP_UHS_SDR25;
-	if (of_find_property(np, "sd-uhs-sdr50", &len))
+	if (of_property_read_bool(np, "sd-uhs-sdr50"))
 		host->caps |= MMC_CAP_UHS_SDR50;
-	if (of_find_property(np, "sd-uhs-sdr104", &len))
+	if (of_property_read_bool(np, "sd-uhs-sdr104"))
 		host->caps |= MMC_CAP_UHS_SDR104;
-	if (of_find_property(np, "sd-uhs-ddr50", &len))
+	if (of_property_read_bool(np, "sd-uhs-ddr50"))
 		host->caps |= MMC_CAP_UHS_DDR50;
-	if (of_find_property(np, "cap-power-off-card", &len))
+	if (of_property_read_bool(np, "cap-power-off-card"))
 		host->caps |= MMC_CAP_POWER_OFF_CARD;
-	if (of_find_property(np, "cap-sdio-irq", &len))
+	if (of_property_read_bool(np, "cap-sdio-irq"))
 		host->caps |= MMC_CAP_SDIO_IRQ;
-	if (of_find_property(np, "full-pwr-cycle", &len))
+	if (of_property_read_bool(np, "full-pwr-cycle"))
 		host->caps2 |= MMC_CAP2_FULL_PWR_CYCLE;
-	if (of_find_property(np, "keep-power-in-suspend", &len))
+	if (of_property_read_bool(np, "keep-power-in-suspend"))
 		host->pm_caps |= MMC_PM_KEEP_POWER;
-	if (of_find_property(np, "enable-sdio-wakeup", &len))
+	if (of_property_read_bool(np, "enable-sdio-wakeup"))
 		host->pm_caps |= MMC_PM_WAKE_SDIO_IRQ;
-	if (of_find_property(np, "mmc-ddr-1_8v", &len))
+	if (of_property_read_bool(np, "mmc-ddr-1_8v"))
 		host->caps |= MMC_CAP_1_8V_DDR;
-	if (of_find_property(np, "mmc-ddr-1_2v", &len))
+	if (of_property_read_bool(np, "mmc-ddr-1_2v"))
 		host->caps |= MMC_CAP_1_2V_DDR;
-	if (of_find_property(np, "mmc-hs200-1_8v", &len))
+	if (of_property_read_bool(np, "mmc-hs200-1_8v"))
 		host->caps2 |= MMC_CAP2_HS200_1_8V_SDR;
-	if (of_find_property(np, "mmc-hs200-1_2v", &len))
+	if (of_property_read_bool(np, "mmc-hs200-1_2v"))
 		host->caps2 |= MMC_CAP2_HS200_1_2V_SDR;
-	if (of_find_property(np, "mmc-hs400-1_8v", &len))
+	if (of_property_read_bool(np, "mmc-hs400-1_8v"))
 		host->caps2 |= MMC_CAP2_HS400_1_8V | MMC_CAP2_HS200_1_8V_SDR;
-	if (of_find_property(np, "mmc-hs400-1_2v", &len))
+	if (of_property_read_bool(np, "mmc-hs400-1_2v"))
 		host->caps2 |= MMC_CAP2_HS400_1_2V | MMC_CAP2_HS200_1_2V_SDR;
 
 	host->dsr_req = !of_property_read_u32(np, "dsr", &host->dsr);

From d31911b9374a76560d2c8ea4aa6ce5781621e81d Mon Sep 17 00:00:00 2001
From: Haibo Chen <haibo.chen@freescale.com>
Date: Tue, 25 Aug 2015 10:02:11 +0800
Subject: [PATCH 390/734] mmc: sdhci: fix dma memory leak in sdhci_pre_req()

Currently one mrq->data maybe execute dma_map_sg() twice
when mmc subsystem prepare over one new request, and the
following log show up:
	sdhci[sdhci_pre_dma_transfer] invalid cookie: 24, next-cookie 25

In this condition, mrq->date map a dma-memory(1) in sdhci_pre_req
for the first time, and map another dma-memory(2) in sdhci_prepare_data
for the second time. But driver only unmap the dma-memory(2), and
dma-memory(1) never unmapped, which cause the dma memory leak issue.

This patch use another method to map the dma memory for the mrq->data
which can fix this dma memory leak issue.

Fixes: 348487cb28e6 ("mmc: sdhci: use pipeline mmc requests to improve performance")
Reported-and-tested-by: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Haibo Chen <haibo.chen@freescale.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci.c | 67 +++++++++++++++-------------------------
 drivers/mmc/host/sdhci.h |  8 ++---
 2 files changed, 29 insertions(+), 46 deletions(-)

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 0f1a8876e3b10e..31678b55b5ec56 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -54,8 +54,7 @@ static void sdhci_finish_command(struct sdhci_host *);
 static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode);
 static void sdhci_enable_preset_value(struct sdhci_host *host, bool enable);
 static int sdhci_pre_dma_transfer(struct sdhci_host *host,
-					struct mmc_data *data,
-					struct sdhci_host_next *next);
+					struct mmc_data *data);
 static int sdhci_do_get_cd(struct sdhci_host *host);
 
 #ifdef CONFIG_PM
@@ -495,7 +494,7 @@ static int sdhci_adma_table_pre(struct sdhci_host *host,
 		goto fail;
 	BUG_ON(host->align_addr & host->align_mask);
 
-	host->sg_count = sdhci_pre_dma_transfer(host, data, NULL);
+	host->sg_count = sdhci_pre_dma_transfer(host, data);
 	if (host->sg_count < 0)
 		goto unmap_align;
 
@@ -634,9 +633,11 @@ static void sdhci_adma_table_post(struct sdhci_host *host,
 		}
 	}
 
-	if (!data->host_cookie)
+	if (data->host_cookie == COOKIE_MAPPED) {
 		dma_unmap_sg(mmc_dev(host->mmc), data->sg,
 			data->sg_len, direction);
+		data->host_cookie = COOKIE_UNMAPPED;
+	}
 }
 
 static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_command *cmd)
@@ -832,7 +833,7 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd)
 		} else {
 			int sg_cnt;
 
-			sg_cnt = sdhci_pre_dma_transfer(host, data, NULL);
+			sg_cnt = sdhci_pre_dma_transfer(host, data);
 			if (sg_cnt <= 0) {
 				/*
 				 * This only happens when someone fed
@@ -948,11 +949,13 @@ static void sdhci_finish_data(struct sdhci_host *host)
 		if (host->flags & SDHCI_USE_ADMA)
 			sdhci_adma_table_post(host, data);
 		else {
-			if (!data->host_cookie)
+			if (data->host_cookie == COOKIE_MAPPED) {
 				dma_unmap_sg(mmc_dev(host->mmc),
 					data->sg, data->sg_len,
 					(data->flags & MMC_DATA_READ) ?
 					DMA_FROM_DEVICE : DMA_TO_DEVICE);
+				data->host_cookie = COOKIE_UNMAPPED;
+			}
 		}
 	}
 
@@ -2116,49 +2119,36 @@ static void sdhci_post_req(struct mmc_host *mmc, struct mmc_request *mrq,
 	struct mmc_data *data = mrq->data;
 
 	if (host->flags & SDHCI_REQ_USE_DMA) {
-		if (data->host_cookie)
+		if (data->host_cookie == COOKIE_GIVEN ||
+				data->host_cookie == COOKIE_MAPPED)
 			dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
 					 data->flags & MMC_DATA_WRITE ?
 					 DMA_TO_DEVICE : DMA_FROM_DEVICE);
-		mrq->data->host_cookie = 0;
+		data->host_cookie = COOKIE_UNMAPPED;
 	}
 }
 
 static int sdhci_pre_dma_transfer(struct sdhci_host *host,
-				       struct mmc_data *data,
-				       struct sdhci_host_next *next)
+				       struct mmc_data *data)
 {
 	int sg_count;
 
-	if (!next && data->host_cookie &&
-	    data->host_cookie != host->next_data.cookie) {
-		pr_debug(DRIVER_NAME "[%s] invalid cookie: %d, next-cookie %d\n",
-			__func__, data->host_cookie, host->next_data.cookie);
-		data->host_cookie = 0;
+	if (data->host_cookie == COOKIE_MAPPED) {
+		data->host_cookie = COOKIE_GIVEN;
+		return data->sg_count;
 	}
 
-	/* Check if next job is already prepared */
-	if (next ||
-	    (!next && data->host_cookie != host->next_data.cookie)) {
-		sg_count = dma_map_sg(mmc_dev(host->mmc), data->sg,
-				     data->sg_len,
-				     data->flags & MMC_DATA_WRITE ?
-				     DMA_TO_DEVICE : DMA_FROM_DEVICE);
-
-	} else {
-		sg_count = host->next_data.sg_count;
-		host->next_data.sg_count = 0;
-	}
+	WARN_ON(data->host_cookie == COOKIE_GIVEN);
 
+	sg_count = dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
+				data->flags & MMC_DATA_WRITE ?
+				DMA_TO_DEVICE : DMA_FROM_DEVICE);
 
 	if (sg_count == 0)
-		return -EINVAL;
+		return -ENOSPC;
 
-	if (next) {
-		next->sg_count = sg_count;
-		data->host_cookie = ++next->cookie < 0 ? 1 : next->cookie;
-	} else
-		host->sg_count = sg_count;
+	data->sg_count = sg_count;
+	data->host_cookie = COOKIE_MAPPED;
 
 	return sg_count;
 }
@@ -2168,16 +2158,10 @@ static void sdhci_pre_req(struct mmc_host *mmc, struct mmc_request *mrq,
 {
 	struct sdhci_host *host = mmc_priv(mmc);
 
-	if (mrq->data->host_cookie) {
-		mrq->data->host_cookie = 0;
-		return;
-	}
+	mrq->data->host_cookie = COOKIE_UNMAPPED;
 
 	if (host->flags & SDHCI_REQ_USE_DMA)
-		if (sdhci_pre_dma_transfer(host,
-					mrq->data,
-					&host->next_data) < 0)
-			mrq->data->host_cookie = 0;
+		sdhci_pre_dma_transfer(host, mrq->data);
 }
 
 static void sdhci_card_event(struct mmc_host *mmc)
@@ -3049,7 +3033,6 @@ int sdhci_add_host(struct sdhci_host *host)
 		host->max_clk = host->ops->get_max_clock(host);
 	}
 
-	host->next_data.cookie = 1;
 	/*
 	 * In case of Host Controller v3.00, find out whether clock
 	 * multiplier is supported.
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 67046ca0c1f05d..7c02ff46c8ac3e 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -309,9 +309,10 @@ struct sdhci_adma2_64_desc {
  */
 #define SDHCI_MAX_SEGS		128
 
-struct sdhci_host_next {
-	unsigned int	sg_count;
-	s32		cookie;
+enum sdhci_cookie {
+	COOKIE_UNMAPPED,
+	COOKIE_MAPPED,
+	COOKIE_GIVEN,
 };
 
 struct sdhci_host {
@@ -505,7 +506,6 @@ struct sdhci_host {
 	unsigned int		tuning_mode;	/* Re-tuning mode supported by host */
 #define SDHCI_TUNING_MODE_1	0
 
-	struct sdhci_host_next	next_data;
 	unsigned long private[0] ____cacheline_aligned;
 };
 

From 77bd2f6f6c65b4ad259394d416855ed561f21e8f Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@freescale.com>
Date: Tue, 11 Aug 2015 10:53:34 +0800
Subject: [PATCH 391/734] mmc: sdhci-of-esdhc: add workaround for pre divider
 initial value

For eSDHC(version < 2.3), the pre divider only could divide base clock
by 2 at least. Add workaround for this to avoid unexpected issue.

Signed-off-by: Yangbo Lu <yangbo.lu@freescale.com>
Acked-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
Fixes: bd455029d01c ("mmc: sdhci-of-esdhc: Pre divider starts at 1")
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-of-esdhc.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c
index 797be7549a15c0..653f335bef1516 100644
--- a/drivers/mmc/host/sdhci-of-esdhc.c
+++ b/drivers/mmc/host/sdhci-of-esdhc.c
@@ -208,6 +208,12 @@ static void esdhc_of_set_clock(struct sdhci_host *host, unsigned int clock)
 	if (clock == 0)
 		return;
 
+	/* Workaround to start pre_div at 2 for VNN < VENDOR_V_23 */
+	temp = esdhc_readw(host, SDHCI_HOST_VERSION);
+	temp = (temp & SDHCI_VENDOR_VER_MASK) >> SDHCI_VENDOR_VER_SHIFT;
+	if (temp < VENDOR_V_23)
+		pre_div = 2;
+
 	/* Workaround to reduce the clock frequency for p1010 esdhc */
 	if (of_find_compatible_node(NULL, NULL, "fsl,p1010-esdhc")) {
 		if (clock > 20000000)

From 2a2a7ea7c0126d388c14c28927cdba429b4858dd Mon Sep 17 00:00:00 2001
From: Haibo Chen <haibo.chen@freescale.com>
Date: Tue, 11 Aug 2015 19:38:28 +0800
Subject: [PATCH 392/734] mmc: sdhci-esdhc-imx: Document new DT bindings for
 imx7d support

Add a required property "fsl,imx7d-usdhc" in binding doc.
Add an optional property "fsl,tuning-step" in binding doc.

Signed-off-by: Haibo Chen <haibo.chen@freescale.com>
Acked-by: Dong Aisheng <aisheng.dong@freescale.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt b/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt
index 211e7785f4d240..dca56d6248f594 100644
--- a/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt
+++ b/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt
@@ -15,6 +15,7 @@ Required properties:
 	       "fsl,imx6q-usdhc"
 	       "fsl,imx6sl-usdhc"
 	       "fsl,imx6sx-usdhc"
+	       "fsl,imx7d-usdhc"
 
 Optional properties:
 - fsl,wp-controller : Indicate to use controller internal write protection
@@ -27,6 +28,11 @@ Optional properties:
   transparent level shifters on the outputs of the controller. Two cells are
   required, first cell specifies minimum slot voltage (mV), second cell
   specifies maximum slot voltage (mV). Several ranges could be specified.
+- fsl,tuning-step: Specify the increasing delay cell steps in tuning procedure.
+  The uSDHC use one delay cell as default increasing step to do tuning process.
+  This property allows user to change the tuning step to more than one delay
+  cells which is useful for some special boards or cards when the default
+  tuning step can't find the proper delay window within limited tuning retries.
 
 Examples:
 

From 28b07674f287092f3b63a7d5e5c7e68bdeed0247 Mon Sep 17 00:00:00 2001
From: Haibo Chen <haibo.chen@freescale.com>
Date: Tue, 11 Aug 2015 19:38:26 +0800
Subject: [PATCH 393/734] mmc: sdhci-esdhc-imx: add imx7d support and support
 HS400

The imx7d usdhc is derived from imx6sx, the difference is that
imx7d support HS400.

So introduce a new compatible string for imx7d and add HS400
support for imx7d usdhc.

Signed-off-by: Haibo Chen <haibo.chen@freescale.com>
Acked-by: Dong Aisheng <aisheng.dong@freescale.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-esdhc-imx.c | 86 ++++++++++++++++++++++++++++--
 1 file changed, 83 insertions(+), 3 deletions(-)

diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index c6b9f6492e1a25..b8b7e8842ed0ce 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -44,6 +44,7 @@
 #define  ESDHC_MIX_CTRL_EXE_TUNE	(1 << 22)
 #define  ESDHC_MIX_CTRL_SMPCLK_SEL	(1 << 23)
 #define  ESDHC_MIX_CTRL_FBCLK_SEL	(1 << 25)
+#define  ESDHC_MIX_CTRL_HS400_EN	(1 << 26)
 /* Bits 3 and 6 are not SDHCI standard definitions */
 #define  ESDHC_MIX_CTRL_SDHCI_MASK	0xb7
 /* Tuning bits */
@@ -60,6 +61,16 @@
 #define  ESDHC_TUNE_CTRL_MIN		0
 #define  ESDHC_TUNE_CTRL_MAX		((1 << 7) - 1)
 
+/* strobe dll register */
+#define ESDHC_STROBE_DLL_CTRL		0x70
+#define ESDHC_STROBE_DLL_CTRL_ENABLE	(1 << 0)
+#define ESDHC_STROBE_DLL_CTRL_RESET	(1 << 1)
+#define ESDHC_STROBE_DLL_CTRL_SLV_DLY_TARGET_SHIFT	3
+
+#define ESDHC_STROBE_DLL_STATUS		0x74
+#define ESDHC_STROBE_DLL_STS_REF_LOCK	(1 << 1)
+#define ESDHC_STROBE_DLL_STS_SLV_LOCK	0x1
+
 #define ESDHC_TUNING_CTRL		0xcc
 #define ESDHC_STD_TUNING_EN		(1 << 24)
 /* NOTE: the minimum valid tuning start tap for mx6sl is 1 */
@@ -120,6 +131,11 @@
 #define ESDHC_FLAG_ERR004536		BIT(7)
 /* The IP supports HS200 mode */
 #define ESDHC_FLAG_HS200		BIT(8)
+/* The IP supports HS400 mode */
+#define ESDHC_FLAG_HS400		BIT(9)
+
+/* A higher clock ferquency than this rate requires strobell dll control */
+#define ESDHC_STROBE_DLL_CLK_FREQ	100000000
 
 struct esdhc_soc_data {
 	u32 flags;
@@ -156,6 +172,12 @@ static struct esdhc_soc_data usdhc_imx6sx_data = {
 			| ESDHC_FLAG_HAVE_CAP1 | ESDHC_FLAG_HS200,
 };
 
+static struct esdhc_soc_data usdhc_imx7d_data = {
+	.flags = ESDHC_FLAG_USDHC | ESDHC_FLAG_STD_TUNING
+			| ESDHC_FLAG_HAVE_CAP1 | ESDHC_FLAG_HS200
+			| ESDHC_FLAG_HS400,
+};
+
 struct pltfm_imx_data {
 	u32 scratchpad;
 	struct pinctrl *pinctrl;
@@ -199,6 +221,7 @@ static const struct of_device_id imx_esdhc_dt_ids[] = {
 	{ .compatible = "fsl,imx6sx-usdhc", .data = &usdhc_imx6sx_data, },
 	{ .compatible = "fsl,imx6sl-usdhc", .data = &usdhc_imx6sl_data, },
 	{ .compatible = "fsl,imx6q-usdhc", .data = &usdhc_imx6q_data, },
+	{ .compatible = "fsl,imx7d-usdhc", .data = &usdhc_imx7d_data, },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, imx_esdhc_dt_ids);
@@ -274,6 +297,9 @@ static u32 esdhc_readl_le(struct sdhci_host *host, int reg)
 				val = SDHCI_SUPPORT_DDR50 | SDHCI_SUPPORT_SDR104
 					| SDHCI_SUPPORT_SDR50
 					| SDHCI_USE_SDR50_TUNING;
+
+			if (imx_data->socdata->flags & ESDHC_FLAG_HS400)
+				val |= SDHCI_SUPPORT_HS400;
 		}
 	}
 
@@ -774,6 +800,7 @@ static int esdhc_change_pinstate(struct sdhci_host *host,
 		break;
 	case MMC_TIMING_UHS_SDR104:
 	case MMC_TIMING_MMC_HS200:
+	case MMC_TIMING_MMC_HS400:
 		pinctrl = imx_data->pins_200mhz;
 		break;
 	default:
@@ -784,24 +811,68 @@ static int esdhc_change_pinstate(struct sdhci_host *host,
 	return pinctrl_select_state(imx_data->pinctrl, pinctrl);
 }
 
+/*
+ * For HS400 eMMC, there is a data_strobe line, this signal is generated
+ * by the device and used for data output and CRC status response output
+ * in HS400 mode. The frequency of this signal follows the frequency of
+ * CLK generated by host. Host receive the data which is aligned to the
+ * edge of data_strobe line. Due to the time delay between CLK line and
+ * data_strobe line, if the delay time is larger than one clock cycle,
+ * then CLK and data_strobe line will misaligned, read error shows up.
+ * So when the CLK is higher than 100MHz, each clock cycle is short enough,
+ * host should config the delay target.
+ */
+static void esdhc_set_strobe_dll(struct sdhci_host *host)
+{
+	u32 v;
+
+	if (host->mmc->actual_clock > ESDHC_STROBE_DLL_CLK_FREQ) {
+		/* force a reset on strobe dll */
+		writel(ESDHC_STROBE_DLL_CTRL_RESET,
+			host->ioaddr + ESDHC_STROBE_DLL_CTRL);
+		/*
+		 * enable strobe dll ctrl and adjust the delay target
+		 * for the uSDHC loopback read clock
+		 */
+		v = ESDHC_STROBE_DLL_CTRL_ENABLE |
+			(7 << ESDHC_STROBE_DLL_CTRL_SLV_DLY_TARGET_SHIFT);
+		writel(v, host->ioaddr + ESDHC_STROBE_DLL_CTRL);
+		/* wait 1us to make sure strobe dll status register stable */
+		udelay(1);
+		v = readl(host->ioaddr + ESDHC_STROBE_DLL_STATUS);
+		if (!(v & ESDHC_STROBE_DLL_STS_REF_LOCK))
+			dev_warn(mmc_dev(host->mmc),
+				"warning! HS400 strobe DLL status REF not lock!\n");
+		if (!(v & ESDHC_STROBE_DLL_STS_SLV_LOCK))
+			dev_warn(mmc_dev(host->mmc),
+				"warning! HS400 strobe DLL status SLV not lock!\n");
+	}
+}
+
 static void esdhc_set_uhs_signaling(struct sdhci_host *host, unsigned timing)
 {
+	u32 m;
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 	struct pltfm_imx_data *imx_data = pltfm_host->priv;
 	struct esdhc_platform_data *boarddata = &imx_data->boarddata;
 
+	/* disable ddr mode and disable HS400 mode */
+	m = readl(host->ioaddr + ESDHC_MIX_CTRL);
+	m &= ~(ESDHC_MIX_CTRL_DDREN | ESDHC_MIX_CTRL_HS400_EN);
+	imx_data->is_ddr = 0;
+
 	switch (timing) {
 	case MMC_TIMING_UHS_SDR12:
 	case MMC_TIMING_UHS_SDR25:
 	case MMC_TIMING_UHS_SDR50:
 	case MMC_TIMING_UHS_SDR104:
 	case MMC_TIMING_MMC_HS200:
+		writel(m, host->ioaddr + ESDHC_MIX_CTRL);
 		break;
 	case MMC_TIMING_UHS_DDR50:
 	case MMC_TIMING_MMC_DDR52:
-		writel(readl(host->ioaddr + ESDHC_MIX_CTRL) |
-				ESDHC_MIX_CTRL_DDREN,
-				host->ioaddr + ESDHC_MIX_CTRL);
+		m |= ESDHC_MIX_CTRL_DDREN;
+		writel(m, host->ioaddr + ESDHC_MIX_CTRL);
 		imx_data->is_ddr = 1;
 		if (boarddata->delay_line) {
 			u32 v;
@@ -813,6 +884,12 @@ static void esdhc_set_uhs_signaling(struct sdhci_host *host, unsigned timing)
 			writel(v, host->ioaddr + ESDHC_DLL_CTRL);
 		}
 		break;
+	case MMC_TIMING_MMC_HS400:
+		m |= ESDHC_MIX_CTRL_DDREN | ESDHC_MIX_CTRL_HS400_EN;
+		writel(m, host->ioaddr + ESDHC_MIX_CTRL);
+		imx_data->is_ddr = 1;
+		esdhc_set_strobe_dll(host);
+		break;
 	}
 
 	esdhc_change_pinstate(host, timing);
@@ -1100,6 +1177,9 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev)
 	if (imx_data->socdata->flags & ESDHC_FLAG_ERR004536)
 		host->quirks |= SDHCI_QUIRK_BROKEN_ADMA;
 
+	if (imx_data->socdata->flags & ESDHC_FLAG_HS400)
+		host->quirks2 |= SDHCI_QUIRK2_CAPS_BIT63_FOR_HS400;
+
 	if (of_id)
 		err = sdhci_esdhc_imx_probe_dt(pdev, host, imx_data);
 	else

From d407e30ba614b1542c8ac032f8fb2332b8071efe Mon Sep 17 00:00:00 2001
From: Haibo Chen <haibo.chen@freescale.com>
Date: Tue, 11 Aug 2015 19:38:27 +0800
Subject: [PATCH 394/734] mmc: sdhci-esdhc-imx: add tuning-step setting support

tuning-step is the delay cell steps in tuning procedure. The default value
of tuning-step is 1. Some boards or cards need another value to pass the
tuning procedure. For example, imx7d-sdb board need the tuning-step value
as 2, otherwise it can't pass the tuning procedure.

So this patch add the tuning-step setting in driver, so that user can set
the tuning-step value in dts.

Signed-off-by: Haibo Chen <haibo.chen@freescale.com>
Acked-by: Dong Aisheng <aisheng.dong@freescale.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-esdhc-imx.c          | 9 +++++++++
 include/linux/platform_data/mmc-esdhc-imx.h | 1 +
 2 files changed, 10 insertions(+)

diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index b8b7e8842ed0ce..298551d00fa4c1 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -75,6 +75,7 @@
 #define ESDHC_STD_TUNING_EN		(1 << 24)
 /* NOTE: the minimum valid tuning start tap for mx6sl is 1 */
 #define ESDHC_TUNING_START_TAP		0x1
+#define ESDHC_TUNING_STEP_SHIFT		16
 
 /* pinctrl state */
 #define ESDHC_PINCTRL_STATE_100MHZ	"state_100mhz"
@@ -474,6 +475,7 @@ static void esdhc_writew_le(struct sdhci_host *host, u16 val, int reg)
 		} else if (imx_data->socdata->flags & ESDHC_FLAG_STD_TUNING) {
 			u32 v = readl(host->ioaddr + SDHCI_ACMD12_ERR);
 			u32 m = readl(host->ioaddr + ESDHC_MIX_CTRL);
+			u32 tuning_ctrl;
 			if (val & SDHCI_CTRL_TUNED_CLK) {
 				v |= ESDHC_MIX_CTRL_SMPCLK_SEL;
 			} else {
@@ -484,6 +486,11 @@ static void esdhc_writew_le(struct sdhci_host *host, u16 val, int reg)
 			if (val & SDHCI_CTRL_EXEC_TUNING) {
 				v |= ESDHC_MIX_CTRL_EXE_TUNE;
 				m |= ESDHC_MIX_CTRL_FBCLK_SEL;
+				tuning_ctrl = readl(host->ioaddr + ESDHC_TUNING_CTRL);
+				tuning_ctrl |= ESDHC_STD_TUNING_EN | ESDHC_TUNING_START_TAP;
+				if (imx_data->boarddata.tuning_step)
+					tuning_ctrl |= imx_data->boarddata.tuning_step << ESDHC_TUNING_STEP_SHIFT;
+					writel(tuning_ctrl, host->ioaddr + ESDHC_TUNING_CTRL);
 			} else {
 				v &= ~ESDHC_MIX_CTRL_EXE_TUNE;
 			}
@@ -963,6 +970,8 @@ sdhci_esdhc_imx_probe_dt(struct platform_device *pdev,
 	if (gpio_is_valid(boarddata->wp_gpio))
 		boarddata->wp_type = ESDHC_WP_GPIO;
 
+	of_property_read_u32(np, "fsl,tuning-step", &boarddata->tuning_step);
+
 	if (of_find_property(np, "no-1-8-v", NULL))
 		boarddata->support_vsel = false;
 	else
diff --git a/include/linux/platform_data/mmc-esdhc-imx.h b/include/linux/platform_data/mmc-esdhc-imx.h
index e1571efa3f2b28..95ccab3f454a95 100644
--- a/include/linux/platform_data/mmc-esdhc-imx.h
+++ b/include/linux/platform_data/mmc-esdhc-imx.h
@@ -45,5 +45,6 @@ struct esdhc_platform_data {
 	int max_bus_width;
 	bool support_vsel;
 	unsigned int delay_line;
+	unsigned int tuning_step;       /* The delay cell steps in tuning procedure */
 };
 #endif /* __ASM_ARCH_IMX_ESDHC_H */

From fd44954e77b436673eb5221e5485a32ea6550128 Mon Sep 17 00:00:00 2001
From: Haibo Chen <haibo.chen@freescale.com>
Date: Tue, 11 Aug 2015 19:38:30 +0800
Subject: [PATCH 395/734] mmc: sdhci-esdhc-imx: set back the
 burst_length_enable bit to 1

Currently we find that if a usdhc is choosed to boot system, then ROM
code will set the burst length enable bit of this usdhc as 0.

This will make performance drop a lot if this usdhc's burst length is
configed. So this patch set back the burst_length_enable bit as 1,
which is the default value, and means burst length is enabled for INCR.

Signed-off-by: Haibo Chen <haibo.chen@freescale.com>
Acked-by: Dong Aisheng <aisheng.dong@freescale.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-esdhc-imx.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index 298551d00fa4c1..ac8ec01b89aa5d 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -32,6 +32,7 @@
 #include "sdhci-esdhc.h"
 
 #define	ESDHC_CTRL_D3CD			0x08
+#define ESDHC_BURST_LEN_EN_INCR		(1 << 27)
 /* VENDOR SPEC register */
 #define ESDHC_VENDOR_SPEC		0xc0
 #define  ESDHC_VENDOR_SPEC_SDIO_QUIRK	(1 << 1)
@@ -1163,6 +1164,21 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev)
 		host->quirks2 |= SDHCI_QUIRK2_PRESET_VALUE_BROKEN;
 		host->mmc->caps |= MMC_CAP_1_8V_DDR;
 
+		/*
+		 * ROM code will change the bit burst_length_enable setting
+		 * to zero if this usdhc is choosed to boot system. Change
+		 * it back here, otherwise it will impact the performance a
+		 * lot. This bit is used to enable/disable the burst length
+		 * for the external AHB2AXI bridge, it's usefully especially
+		 * for INCR transfer because without burst length indicator,
+		 * the AHB2AXI bridge does not know the burst length in
+		 * advance. And without burst length indicator, AHB INCR
+		 * transfer can only be converted to singles on the AXI side.
+		 */
+		writel(readl(host->ioaddr + SDHCI_HOST_CONTROL)
+			| ESDHC_BURST_LEN_EN_INCR,
+			host->ioaddr + SDHCI_HOST_CONTROL);
+
 		if (!(imx_data->socdata->flags & ESDHC_FLAG_HS200))
 			host->quirks2 |= SDHCI_QUIRK2_BROKEN_HS200;
 

From e31e67cf2578bd05e99afabb22403542306d6bed Mon Sep 17 00:00:00 2001
From: Haibo Chen <haibo.chen@freescale.com>
Date: Tue, 11 Aug 2015 19:38:31 +0800
Subject: [PATCH 396/734] mmc: sdhci-esdhc-imx: change default watermark level
 and burst length

By default, for all imx SoC types, the watermark level is 16, and the
burst length is 8. But if the SDIO/SD/MMC I/O speed is fast enough,
this default watermark level and burst length will be the performance
bottleneck.

For example, i.MX7D support eMMC HS400 mode, this mode can run in 8 bit,
200MHZ DDR mode. So the I/O speed improve a lot compare to SD3.0.
The default burst length is 8, if we don't change this value, in
HS400 mode, when we do eMMC read operation, we can find that the
clock signal will stop for a period of time. This means the speed
of data moving on AHB bus is slower than I/O speed. So we should
improve the speed of data moving on AHB bus.

This patch set the default burst length as 16, and set the default
watermark level as 64. The test result is the clock signal has
no stop during the eMMC HS400 operation.

Signed-off-by: Haibo Chen <haibo.chen@freescale.com>
Acked-by: Dong Aisheng <aisheng.dong@freescale.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-esdhc-imx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index ac8ec01b89aa5d..886d230f41d073 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -1160,7 +1160,8 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev)
 	 * to something insane.  Change it back here.
 	 */
 	if (esdhc_is_usdhc(imx_data)) {
-		writel(0x08100810, host->ioaddr + ESDHC_WTMK_LVL);
+		writel(0x10401040, host->ioaddr + ESDHC_WTMK_LVL);
+
 		host->quirks2 |= SDHCI_QUIRK2_PRESET_VALUE_BROKEN;
 		host->mmc->caps |= MMC_CAP_1_8V_DDR;
 

From b5b4ff0a633910b2b9dca7915fd6ab17aa10dc3e Mon Sep 17 00:00:00 2001
From: Shawn Lin <shawn.lin@rock-chips.com>
Date: Wed, 12 Aug 2015 13:08:32 +0800
Subject: [PATCH 397/734] mmc: block: skip trim for some kingston eMMCs

For some mass production of kingston eMMCs which adopt Phison's
firmware will meet an unrecoverable data conrruption occasionally
if performing trim due to a firmware bug confirmed by vendor. We
found it on Intel-C3230RK platform. So we add fixup of broken trim
for it.

Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/card/block.c | 10 ++++++++++
 drivers/mmc/core/core.c  |  3 ++-
 include/linux/mmc/card.h |  2 ++
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index a58287e574cc81..c742cfd7674e0e 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -2389,6 +2389,7 @@ static int mmc_add_disk(struct mmc_blk_data *md)
 #define CID_MANFID_TOSHIBA	0x11
 #define CID_MANFID_MICRON	0x13
 #define CID_MANFID_SAMSUNG	0x15
+#define CID_MANFID_KINGSTON	0x70
 
 static const struct mmc_fixup blk_fixups[] =
 {
@@ -2451,6 +2452,15 @@ static const struct mmc_fixup blk_fixups[] =
 	MMC_FIXUP("VZL00M", CID_MANFID_SAMSUNG, CID_OEMID_ANY, add_quirk_mmc,
 		  MMC_QUIRK_SEC_ERASE_TRIM_BROKEN),
 
+	/*
+	 *  On Some Kingston eMMCs, performing trim can result in
+	 *  unrecoverable data conrruption occasionally due to a firmware bug.
+	 */
+	MMC_FIXUP("V10008", CID_MANFID_KINGSTON, CID_OEMID_ANY, add_quirk_mmc,
+		  MMC_QUIRK_TRIM_BROKEN),
+	MMC_FIXUP("V10016", CID_MANFID_KINGSTON, CID_OEMID_ANY, add_quirk_mmc,
+		  MMC_QUIRK_TRIM_BROKEN),
+
 	END_FIXUP
 };
 
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 57edb2a9bb04bb..664b61729fa963 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -2250,7 +2250,8 @@ EXPORT_SYMBOL(mmc_can_erase);
 
 int mmc_can_trim(struct mmc_card *card)
 {
-	if (card->ext_csd.sec_feature_support & EXT_CSD_SEC_GB_CL_EN)
+	if ((card->ext_csd.sec_feature_support & EXT_CSD_SEC_GB_CL_EN) &&
+	    (!(card->quirks & MMC_QUIRK_TRIM_BROKEN)))
 		return 1;
 	return 0;
 }
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 8fcbcd13218f5f..fdd0779ccdfa52 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -279,6 +279,8 @@ struct mmc_card {
 #define MMC_QUIRK_LONG_READ_TIME (1<<9)		/* Data read time > CSD says */
 #define MMC_QUIRK_SEC_ERASE_TRIM_BROKEN (1<<10)	/* Skip secure for erase/trim */
 #define MMC_QUIRK_BROKEN_IRQ_POLLING	(1<<11)	/* Polling SDIO_CCCR_INTx could create a fake interrupt */
+#define MMC_QUIRK_TRIM_BROKEN	(1<<12)		/* Skip trim */
+
 
 	unsigned int		erase_size;	/* erase size in sectors */
  	unsigned int		erase_shift;	/* if erase unit is power 2 */

From 1880d8f6fbb01a16404dee7167621dc09b5f1d35 Mon Sep 17 00:00:00 2001
From: Barry Song <Baohua.Song@csr.com>
Date: Wed, 12 Aug 2015 06:59:33 +0000
Subject: [PATCH 398/734] mmc: sdhci-sirf: corrent quirk according to real
 chips

the current quirk set is for an old FPGA, and this patch corrects
quirks according to real SoC.

Signed-off-by: Barry Song <Baohua.Song@csr.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-sirf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/host/sdhci-sirf.c b/drivers/mmc/host/sdhci-sirf.c
index 0110bae25b7e8e..884294576356d4 100644
--- a/drivers/mmc/host/sdhci-sirf.c
+++ b/drivers/mmc/host/sdhci-sirf.c
@@ -161,8 +161,8 @@ static struct sdhci_pltfm_data sdhci_sirf_pdata = {
 	.quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL |
 		SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK |
 		SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN |
-		SDHCI_QUIRK_INVERTED_WRITE_PROTECT |
-		SDHCI_QUIRK_DELAY_AFTER_POWER,
+		SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS,
+	.quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
 };
 
 static int sdhci_sirf_probe(struct platform_device *pdev)

From 7bb9c244356d2d45ac03cf65e55b035c5954d7de Mon Sep 17 00:00:00 2001
From: Michal Suchanek <hramrach@gmail.com>
Date: Wed, 12 Aug 2015 15:29:31 +0200
Subject: [PATCH 399/734] mmc: sunxi: fix timeout in sunxi_mmc_oclk_onoff

The 250ms timeout is too short.

On my system enabling the oclk takes under 50ms and disabling slightly
over 100ms when idle. Under load disabling the clock can take over
350ms.

This does not make mmc clock gating look like good option to have on
sunxi but the system should not crash with mmc clock gating enabled
nonetheless.

This patch sets the timeout to 750ms.

Signed-off-by: Michal Suchanek <hramrach@gmail.com>
Acked-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sunxi-mmc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sunxi-mmc.c b/drivers/mmc/host/sunxi-mmc.c
index 4d3e1ffe550827..a7b7a67715986d 100644
--- a/drivers/mmc/host/sunxi-mmc.c
+++ b/drivers/mmc/host/sunxi-mmc.c
@@ -595,7 +595,7 @@ static irqreturn_t sunxi_mmc_handle_manual_stop(int irq, void *dev_id)
 
 static int sunxi_mmc_oclk_onoff(struct sunxi_mmc_host *host, u32 oclk_en)
 {
-	unsigned long expire = jiffies + msecs_to_jiffies(250);
+	unsigned long expire = jiffies + msecs_to_jiffies(750);
 	u32 rval;
 
 	rval = mmc_readl(host, REG_CLKCR);

From f912632b6086d4464af2443dee0c6f4055cf5159 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Mon, 10 Aug 2015 16:26:48 +0200
Subject: [PATCH 400/734] mmc: atmel-mci: remove useless include

Definitions from linux/platform_data/atmel.h are not used, remove the
include.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Ludovic Desroches <ludovic.desroches@atmel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/atmel-mci.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index 9a39e0b7e58362..bf62e429f7fcc1 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -29,7 +29,6 @@
 #include <linux/slab.h>
 #include <linux/stat.h>
 #include <linux/types.h>
-#include <linux/platform_data/atmel.h>
 #include <linux/platform_data/mmc-atmel-mci.h>
 
 #include <linux/mmc/host.h>

From 0dafa60eb2506617e6968b97cc5a44914a7fb1a6 Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <jszhang@marvell.com>
Date: Tue, 18 Aug 2015 16:21:39 +0800
Subject: [PATCH 401/734] mmc: sdhci: also get preset value and driver type for
 MMC_DDR52

commit bb8175a8aa42 ("mmc: sdhci: clarify DDR timing mode between
SD-UHS and eMMC") added MMC_DDR52 as eMMC's DDR mode to be
distinguished from SD-UHS, but it missed setting driver type for
MMC_DDR52 timing mode.

So sometimes we get the following error on Marvell BG2Q DMP board:

[    1.559598] mmcblk0: error -84 transferring data, sector 0, nr 8, cmd
response 0x900, card status 0xb00
[    1.569314] mmcblk0: retrying using single block read
[    1.575676] mmcblk0: error -84 transferring data, sector 2, nr 6, cmd
response 0x900, card status 0x0
[    1.585202] blk_update_request: I/O error, dev mmcblk0, sector 2
[    1.591818] mmcblk0: error -84 transferring data, sector 3, nr 5, cmd
response 0x900, card status 0x0
[    1.601341] blk_update_request: I/O error, dev mmcblk0, sector 3

This patches fixes this by adding the missing driver type setting.

Fixes: bb8175a8aa42 ("mmc: sdhci: clarify DDR timing mode ...")
Signed-off-by: Jisheng Zhang <jszhang@marvell.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 31678b55b5ec56..64b7fdbd1a9cca 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1134,6 +1134,7 @@ static u16 sdhci_get_preset_value(struct sdhci_host *host)
 		preset = sdhci_readw(host, SDHCI_PRESET_FOR_SDR104);
 		break;
 	case MMC_TIMING_UHS_DDR50:
+	case MMC_TIMING_MMC_DDR52:
 		preset = sdhci_readw(host, SDHCI_PRESET_FOR_DDR50);
 		break;
 	case MMC_TIMING_MMC_HS400:
@@ -1575,7 +1576,8 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
 				 (ios->timing == MMC_TIMING_UHS_SDR25) ||
 				 (ios->timing == MMC_TIMING_UHS_SDR50) ||
 				 (ios->timing == MMC_TIMING_UHS_SDR104) ||
-				 (ios->timing == MMC_TIMING_UHS_DDR50))) {
+				 (ios->timing == MMC_TIMING_UHS_DDR50) ||
+				 (ios->timing == MMC_TIMING_MMC_DDR52))) {
 			u16 preset;
 
 			sdhci_enable_preset_value(host, true);

From da795ec26e2542f1e306598a1d7a31c0762f2bd7 Mon Sep 17 00:00:00 2001
From: Shawn Lin <shawn.lin@rock-chips.com>
Date: Tue, 11 Aug 2015 15:57:05 +0800
Subject: [PATCH 402/734] mmc: sdhci-of-arasan: Add the support for sdhci-5.1

This patch adds the compatible string in sdhci-of-arasan.c to
support sdhci-arasan5.1 version of controller. No documented
controller IP version is found in the TRM, so we use ths version
of command queueing engine integrated into this controller by arasan
to specify our controller.

Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
Acked-by: Michal Simek <michal.simek@xilinx.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 Documentation/devicetree/bindings/mmc/arasan,sdhci.txt | 2 +-
 drivers/mmc/host/sdhci-of-arasan.c                     | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/mmc/arasan,sdhci.txt b/Documentation/devicetree/bindings/mmc/arasan,sdhci.txt
index 7e9490313d5add..da541c3631f818 100644
--- a/Documentation/devicetree/bindings/mmc/arasan,sdhci.txt
+++ b/Documentation/devicetree/bindings/mmc/arasan,sdhci.txt
@@ -9,7 +9,7 @@ Device Tree Bindings for the Arasan SDHCI Controller
 
 Required Properties:
   - compatible: Compatibility string. Must be 'arasan,sdhci-8.9a' or
-                'arasan,sdhci-4.9a'
+                'arasan,sdhci-4.9a' or 'arasan,sdhci-5.1'
   - reg: From mmc bindings: Register location and length.
   - clocks: From clock bindings: Handles to clock inputs.
   - clock-names: From clock bindings: Tuple including "clk_xin" and "clk_ahb"
diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c
index ef5a7d241323a8..75379cb0fb354e 100644
--- a/drivers/mmc/host/sdhci-of-arasan.c
+++ b/drivers/mmc/host/sdhci-of-arasan.c
@@ -217,6 +217,7 @@ static int sdhci_arasan_remove(struct platform_device *pdev)
 
 static const struct of_device_id sdhci_arasan_of_match[] = {
 	{ .compatible = "arasan,sdhci-8.9a" },
+	{ .compatible = "arasan,sdhci-5.1" },
 	{ .compatible = "arasan,sdhci-4.9a" },
 	{ }
 };

From 767264725d6c0c8e2a42f14ded88c8f05fec5863 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@axis.com>
Date: Wed, 19 Aug 2015 15:41:34 +0200
Subject: [PATCH 403/734] mmc: usdhi6rol0: handle probe deferral for regulator

We ignore errors from mmc_regulator_get_supply() because the usage of
the regulators is optional for the driver, but we still need to check
for and handle EPROBE_DEFER, like it's done in for example dw_mmc.
Otherwise we might end up not using the specified regulators just
because of probe order.

Signed-off-by: Rabin Vincent <rabin.vincent@axis.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/usdhi6rol0.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/host/usdhi6rol0.c b/drivers/mmc/host/usdhi6rol0.c
index 54b082b1804a5c..63d5d722b01f4e 100644
--- a/drivers/mmc/host/usdhi6rol0.c
+++ b/drivers/mmc/host/usdhi6rol0.c
@@ -1715,12 +1715,14 @@ static int usdhi6_probe(struct platform_device *pdev)
 	if (!mmc)
 		return -ENOMEM;
 
+	ret = mmc_regulator_get_supply(mmc);
+	if (ret == -EPROBE_DEFER)
+		goto e_free_mmc;
+
 	ret = mmc_of_parse(mmc);
 	if (ret < 0)
 		goto e_free_mmc;
 
-	mmc_regulator_get_supply(mmc);
-
 	host		= mmc_priv(mmc);
 	host->mmc	= mmc;
 	host->wait	= USDHI6_WAIT_FOR_REQUEST;

From bb08a7d489bd22a9b6e489f8c8449b0bc92594d0 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@axis.com>
Date: Wed, 19 Aug 2015 15:41:35 +0200
Subject: [PATCH 404/734] mmc: usdhi6rol0: fix NULL pointer deref in debug
 print

host->sg is only set when we're transferring multiple blocks.  Check for
its availibility before dereferencing it in the timeout work debug
print.

Signed-off-by: Rabin Vincent <rabin.vincent@axis.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/usdhi6rol0.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/usdhi6rol0.c b/drivers/mmc/host/usdhi6rol0.c
index 63d5d722b01f4e..4188e84ea45017 100644
--- a/drivers/mmc/host/usdhi6rol0.c
+++ b/drivers/mmc/host/usdhi6rol0.c
@@ -1634,6 +1634,7 @@ static void usdhi6_timeout_work(struct work_struct *work)
 	struct usdhi6_host *host = container_of(d, struct usdhi6_host, timeout_work);
 	struct mmc_request *mrq = host->mrq;
 	struct mmc_data *data = mrq ? mrq->data : NULL;
+	struct scatterlist *sg = host->sg ?: data->sg;
 
 	dev_warn(mmc_dev(host->mmc),
 		 "%s timeout wait %u CMD%d: IRQ 0x%08x:0x%08x, last IRQ 0x%08x\n",
@@ -1669,7 +1670,7 @@ static void usdhi6_timeout_work(struct work_struct *work)
 			"%c: page #%u @ +0x%zx %ux%u in SG%u. Current SG %u bytes @ %u\n",
 			data->flags & MMC_DATA_READ ? 'R' : 'W', host->page_idx,
 			host->offset, data->blocks, data->blksz, data->sg_len,
-			sg_dma_len(host->sg), host->sg->offset);
+			sg_dma_len(sg), sg->offset);
 		usdhi6_sg_unmap(host, true);
 		/*
 		 * If USDHI6_WAIT_FOR_DATA_END times out, we have already unmapped

From 3fe95db19be6a98bcb45d2780c6a90d1e96bfcc9 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@axis.com>
Date: Wed, 19 Aug 2015 15:41:36 +0200
Subject: [PATCH 405/734] mmc: usdhi6rol0: fix ack register write

The intent appears to be to clear only the bits which are set in status
(by setting them to zero in the ack write), like in the other interrupt
handlers, and not to always clear everything (by always writing zero).
Use the correct not operator.

Signed-off-by: Rabin Vincent <rabin.vincent@axis.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/usdhi6rol0.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/host/usdhi6rol0.c b/drivers/mmc/host/usdhi6rol0.c
index 4188e84ea45017..b505cbc25aac5b 100644
--- a/drivers/mmc/host/usdhi6rol0.c
+++ b/drivers/mmc/host/usdhi6rol0.c
@@ -1611,7 +1611,7 @@ static irqreturn_t usdhi6_cd(int irq, void *dev_id)
 		return IRQ_NONE;
 
 	/* Ack */
-	usdhi6_write(host, USDHI6_SD_INFO1, !status);
+	usdhi6_write(host, USDHI6_SD_INFO1, ~status);
 
 	if (!work_pending(&mmc->detect.work) &&
 	    (((status & USDHI6_SD_INFO1_CARD_INSERT) &&

From 38276a912c9bb6dc48e21dc23a92b6286ca00962 Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Sun, 23 Aug 2015 02:11:12 +0200
Subject: [PATCH 406/734] mmc: omap: fix error return code

Return a negative error code on failure.

A simplified version of the semantic match that finds this problem is as
follows: (http://coccinelle.lip6.fr/)

// <smpl>
@@
identifier ret; expression e1,e2;
@@
(
if (\(ret < 0\|ret != 0\))
 { ... return ret; }
|
ret = 0
)
... when != ret = e1
    when != &ret
*if(...)
{
  ... when != ret = e2
      when forall
 return ret;
}
// </smpl>

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/omap.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index 70dcf074fbe3ff..b763b11ed9e1e7 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c
@@ -1420,8 +1420,10 @@ static int mmc_omap_probe(struct platform_device *pdev)
 	host->reg_shift = (mmc_omap7xx() ? 1 : 2);
 
 	host->mmc_omap_wq = alloc_workqueue("mmc_omap", 0, 0);
-	if (!host->mmc_omap_wq)
+	if (!host->mmc_omap_wq) {
+		ret = -ENOMEM;
 		goto err_plat_cleanup;
+	}
 
 	for (i = 0; i < pdata->nr_slots; i++) {
 		ret = mmc_omap_new_slot(host, i);

From 3b1cac4d9f915758d0d755f11bb8fd4373cf653b Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Sun, 23 Aug 2015 02:11:17 +0200
Subject: [PATCH 407/734] mmc: usdhi6rol0: fix error return code

Propagate error code on failure.

A simplified version of the semantic match that finds this problem is as
follows: (http://coccinelle.lip6.fr/)

// <smpl>
@@
identifier ret; expression e1,e2;
@@
(
if (\(ret < 0\|ret != 0\))
 { ... return ret; }
|
ret = 0
)
... when != ret = e1
    when != &ret
*if(...)
{
  ... when != ret = e2
      when forall
 return ret;
}
// </smpl>

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/usdhi6rol0.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/usdhi6rol0.c b/drivers/mmc/host/usdhi6rol0.c
index b505cbc25aac5b..4498e92116b808 100644
--- a/drivers/mmc/host/usdhi6rol0.c
+++ b/drivers/mmc/host/usdhi6rol0.c
@@ -1737,8 +1737,10 @@ static int usdhi6_probe(struct platform_device *pdev)
 	}
 
 	host->clk = devm_clk_get(dev, NULL);
-	if (IS_ERR(host->clk))
+	if (IS_ERR(host->clk)) {
+		ret = PTR_ERR(host->clk);
 		goto e_free_mmc;
+	}
 
 	host->imclk = clk_get_rate(host->clk);
 

From 5afc30fc666165c1c37c246e08b4282bc8c31d98 Mon Sep 17 00:00:00 2001
From: Koji Matsuoka <koji.matsuoka.xm@renesas.com>
Date: Sun, 23 Aug 2015 21:58:08 +0900
Subject: [PATCH 408/734] mmc: sh_mmcif: Fix suspend process

The clock should be enable when SDHI registers are accessed.

Signed-off-by: Koji Matsuoka <koji.matsuoka.xm@renesas.com>
Signed-off-by: Yoshihiro Kaneko <ykaneko0929@gmail.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sh_mmcif.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index 5a1fdd405b1af1..ad9ffea7d659d2 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -1632,7 +1632,9 @@ static int sh_mmcif_suspend(struct device *dev)
 {
 	struct sh_mmcif_host *host = dev_get_drvdata(dev);
 
+	pm_runtime_get_sync(dev);
 	sh_mmcif_writel(host->addr, MMCIF_CE_INT_MASK, MASK_ALL);
+	pm_runtime_put(dev);
 
 	return 0;
 }

From 5f2b3eab3fade514f9647e88356d29c7795ed7ef Mon Sep 17 00:00:00 2001
From: kbuild test robot <fengguang.wu@intel.com>
Date: Tue, 25 Aug 2015 16:13:29 +0200
Subject: [PATCH 409/734] mmc: sdhci-of-at91: fix platform_no_drv_owner.cocci
 warnings

 Remove .owner field if calls are used which set it automatically

Generated by: scripts/coccinelle/api/platform_no_drv_owner.cocci

CC: ludovic.desroches@atmel.com <ludovic.desroches@atmel.com>
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-of-at91.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c
index 7a9f4b19f98959..d1556643a41d32 100644
--- a/drivers/mmc/host/sdhci-of-at91.c
+++ b/drivers/mmc/host/sdhci-of-at91.c
@@ -177,7 +177,6 @@ static int sdhci_at91_remove(struct platform_device *pdev)
 static struct platform_driver sdhci_at91_driver = {
 	.driver		= {
 		.name	= "sdhci-at91",
-		.owner	= THIS_MODULE,
 		.of_match_table = sdhci_at91_dt_match,
 		.pm	= SDHCI_PLTFM_PMOPS,
 	},

From 7d607f917008218564ae44ca3ef47076a9b36e8f Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Thu, 27 Aug 2015 14:43:53 +0530
Subject: [PATCH 410/734] mmc: host: omap_hsmmc: use
 devm_regulator_get_optional() for vmmc

Since vmmc can be optional for some platforms, use
devm_regulator_get_optional() for vmmc. Now return error only
if the return value of devm_regulator_get_optional() is not the
same as -ENODEV, since with -EPROBE_DEFER, the regulator can be
obtained later and all other errors are fatal.

Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Tested-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/omap_hsmmc.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 19ae7e69352607..9b335aff588284 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -345,15 +345,19 @@ static int omap_hsmmc_reg_get(struct omap_hsmmc_host *host)
 {
 	struct regulator *reg;
 	int ocr_value = 0;
+	int ret;
 
 	if (mmc_pdata(host)->set_power)
 		return 0;
 
-	reg = devm_regulator_get(host->dev, "vmmc");
+	reg = devm_regulator_get_optional(host->dev, "vmmc");
 	if (IS_ERR(reg)) {
-		dev_err(host->dev, "unable to get vmmc regulator %ld\n",
+		ret = PTR_ERR(reg);
+		if (ret != -ENODEV)
+			return ret;
+		host->vcc = NULL;
+		dev_dbg(host->dev, "unable to get vmmc regulator %ld\n",
 			PTR_ERR(reg));
-		return PTR_ERR(reg);
 	} else {
 		host->vcc = reg;
 		ocr_value = mmc_regulator_get_ocrmask(reg);

From 6a9b2ff07d0415ad19fb07b9a141863fb86c3497 Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Thu, 27 Aug 2015 14:43:54 +0530
Subject: [PATCH 411/734] mmc: host: omap_hsmmc: return on fatal errors from
 omap_hsmmc_reg_get

Now return error only if the return value of
devm_regulator_get_optional() is not the same as -ENODEV, since with
-EPROBE_DEFER, the regulator can be obtained later and all other
errors are fatal.

Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Tested-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/omap_hsmmc.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 9b335aff588284..2eafd6f646768e 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -375,10 +375,28 @@ static int omap_hsmmc_reg_get(struct omap_hsmmc_host *host)
 
 	/* Allow an aux regulator */
 	reg = devm_regulator_get_optional(host->dev, "vmmc_aux");
-	host->vcc_aux = IS_ERR(reg) ? NULL : reg;
+	if (IS_ERR(reg)) {
+		ret = PTR_ERR(reg);
+		if (ret != -ENODEV)
+			return ret;
+		host->vcc_aux = NULL;
+		dev_dbg(host->dev, "unable to get vmmc_aux regulator %ld\n",
+			PTR_ERR(reg));
+	} else {
+		host->vcc_aux = reg;
+	}
 
 	reg = devm_regulator_get_optional(host->dev, "pbias");
-	host->pbias = IS_ERR(reg) ? NULL : reg;
+	if (IS_ERR(reg)) {
+		ret = PTR_ERR(reg);
+		if (ret != -ENODEV)
+			return ret;
+		host->pbias = NULL;
+		dev_dbg(host->dev, "unable to get pbias regulator %ld\n",
+			PTR_ERR(reg));
+	} else {
+		host->pbias = reg;
+	}
 
 	/* For eMMC do not power off when not in sleep state */
 	if (mmc_pdata(host)->no_regulator_off_init)

From c299dc39883ca5596905507cc945332fa4bae8bd Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Thu, 27 Aug 2015 14:43:55 +0530
Subject: [PATCH 412/734] mmc: host: omap_hsmmc: cleanup omap_hsmmc_reg_get()

No functional change. Instead of using a local regulator variable
in omap_hsmmc_reg_get() for holding the return value of
devm_regulator_get_optional() and then assigning to omap_hsmmc_host
regulator members: vcc, vcc_aux and pbias, directly use the
omap_hsmmc_host regulator members.

Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Reviewed-by: Roger Quadros <rogerq@ti.com>
Tested-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/omap_hsmmc.c | 38 +++++++++++++++--------------------
 1 file changed, 16 insertions(+), 22 deletions(-)

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 2eafd6f646768e..3fde2f9dfb25e9 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -343,24 +343,22 @@ static int omap_hsmmc_set_power(struct device *dev, int power_on, int vdd)
 
 static int omap_hsmmc_reg_get(struct omap_hsmmc_host *host)
 {
-	struct regulator *reg;
 	int ocr_value = 0;
 	int ret;
 
 	if (mmc_pdata(host)->set_power)
 		return 0;
 
-	reg = devm_regulator_get_optional(host->dev, "vmmc");
-	if (IS_ERR(reg)) {
-		ret = PTR_ERR(reg);
+	host->vcc = devm_regulator_get_optional(host->dev, "vmmc");
+	if (IS_ERR(host->vcc)) {
+		ret = PTR_ERR(host->vcc);
 		if (ret != -ENODEV)
 			return ret;
-		host->vcc = NULL;
 		dev_dbg(host->dev, "unable to get vmmc regulator %ld\n",
-			PTR_ERR(reg));
+			PTR_ERR(host->vcc));
+		host->vcc = NULL;
 	} else {
-		host->vcc = reg;
-		ocr_value = mmc_regulator_get_ocrmask(reg);
+		ocr_value = mmc_regulator_get_ocrmask(host->vcc);
 		if (!mmc_pdata(host)->ocr_mask) {
 			mmc_pdata(host)->ocr_mask = ocr_value;
 		} else {
@@ -374,28 +372,24 @@ static int omap_hsmmc_reg_get(struct omap_hsmmc_host *host)
 	}
 
 	/* Allow an aux regulator */
-	reg = devm_regulator_get_optional(host->dev, "vmmc_aux");
-	if (IS_ERR(reg)) {
-		ret = PTR_ERR(reg);
+	host->vcc_aux = devm_regulator_get_optional(host->dev, "vmmc_aux");
+	if (IS_ERR(host->vcc_aux)) {
+		ret = PTR_ERR(host->vcc_aux);
 		if (ret != -ENODEV)
 			return ret;
-		host->vcc_aux = NULL;
 		dev_dbg(host->dev, "unable to get vmmc_aux regulator %ld\n",
-			PTR_ERR(reg));
-	} else {
-		host->vcc_aux = reg;
+			PTR_ERR(host->vcc_aux));
+		host->vcc_aux = NULL;
 	}
 
-	reg = devm_regulator_get_optional(host->dev, "pbias");
-	if (IS_ERR(reg)) {
-		ret = PTR_ERR(reg);
+	host->pbias = devm_regulator_get_optional(host->dev, "pbias");
+	if (IS_ERR(host->pbias)) {
+		ret = PTR_ERR(host->pbias);
 		if (ret != -ENODEV)
 			return ret;
-		host->pbias = NULL;
 		dev_dbg(host->dev, "unable to get pbias regulator %ld\n",
-			PTR_ERR(reg));
-	} else {
-		host->pbias = reg;
+			PTR_ERR(host->pbias));
+		host->pbias = NULL;
 	}
 
 	/* For eMMC do not power off when not in sleep state */

From b49069fc0b96de5dd508ccba0a1417e524734712 Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Thu, 27 Aug 2015 14:43:56 +0530
Subject: [PATCH 413/734] mmc: host: omap_hsmmc: use the ocrmask provided by
 the vmmc regulator

If the vmmc regulator provides a valid ocrmask, use it. By this even if
the pdata has a valid ocrmask, it will be overwritten with the ocrmask
of the vmmc regulator.
Also remove the unnecessary compatibility check between the ocrmask in
the pdata and the ocrmask from the vmmc regulator.

Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Tested-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/omap_hsmmc.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 3fde2f9dfb25e9..30f363da1987c3 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -359,16 +359,8 @@ static int omap_hsmmc_reg_get(struct omap_hsmmc_host *host)
 		host->vcc = NULL;
 	} else {
 		ocr_value = mmc_regulator_get_ocrmask(host->vcc);
-		if (!mmc_pdata(host)->ocr_mask) {
+		if (ocr_value > 0)
 			mmc_pdata(host)->ocr_mask = ocr_value;
-		} else {
-			if (!(mmc_pdata(host)->ocr_mask & ocr_value)) {
-				dev_err(host->dev, "ocrmask %x is not supported\n",
-					mmc_pdata(host)->ocr_mask);
-				mmc_pdata(host)->ocr_mask = 0;
-				return -EINVAL;
-			}
-		}
 	}
 
 	/* Allow an aux regulator */

From aa9a68014bb6c6e1052d79561815885b797d15ea Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Thu, 27 Aug 2015 14:43:57 +0530
Subject: [PATCH 414/734] mmc: host: omap_hsmmc: use mmc_host's vmmc and vqmmc

No functional change. Instead of using omap_hsmmc_host's vcc and vcc_aux
members, use vmmc and vqmmc present in mmc_host which is present
for the same purpose.

Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Reviewed-by: Roger Quadros <rogerq@ti.com>
Tested-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/omap_hsmmc.c | 63 ++++++++++++++++-------------------
 1 file changed, 28 insertions(+), 35 deletions(-)

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 30f363da1987c3..58e4ffd3c70e9c 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -181,15 +181,6 @@ struct omap_hsmmc_host {
 	struct	mmc_data	*data;
 	struct	clk		*fclk;
 	struct	clk		*dbclk;
-	/*
-	 * vcc == configured supply
-	 * vcc_aux == optional
-	 *   -	MMC1, supply for DAT4..DAT7
-	 *   -	MMC2/MMC2, external level shifter voltage supply, for
-	 *	chip (SDIO, eMMC, etc) or transceiver (MMC2 only)
-	 */
-	struct	regulator	*vcc;
-	struct	regulator	*vcc_aux;
 	struct	regulator	*pbias;
 	bool			pbias_enabled;
 	void	__iomem		*base;
@@ -259,6 +250,7 @@ static int omap_hsmmc_set_power(struct device *dev, int power_on, int vdd)
 {
 	struct omap_hsmmc_host *host =
 		platform_get_drvdata(to_platform_device(dev));
+	struct mmc_host *mmc = host->mmc;
 	int ret = 0;
 
 	if (mmc_pdata(host)->set_power)
@@ -268,7 +260,7 @@ static int omap_hsmmc_set_power(struct device *dev, int power_on, int vdd)
 	 * If we don't see a Vcc regulator, assume it's a fixed
 	 * voltage always-on regulator.
 	 */
-	if (!host->vcc)
+	if (!mmc->supply.vmmc)
 		return 0;
 
 	if (mmc_pdata(host)->before_set_reg)
@@ -297,23 +289,23 @@ static int omap_hsmmc_set_power(struct device *dev, int power_on, int vdd)
 	 * chips/cards need an interface voltage rail too.
 	 */
 	if (power_on) {
-		if (host->vcc)
-			ret = mmc_regulator_set_ocr(host->mmc, host->vcc, vdd);
+		if (mmc->supply.vmmc)
+			ret = mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd);
 		/* Enable interface voltage rail, if needed */
-		if (ret == 0 && host->vcc_aux) {
-			ret = regulator_enable(host->vcc_aux);
-			if (ret < 0 && host->vcc)
-				ret = mmc_regulator_set_ocr(host->mmc,
-							host->vcc, 0);
+		if (ret == 0 && mmc->supply.vqmmc) {
+			ret = regulator_enable(mmc->supply.vqmmc);
+			if (ret < 0 && mmc->supply.vmmc)
+				ret = mmc_regulator_set_ocr(mmc,
+							    mmc->supply.vmmc,
+							    0);
 		}
 	} else {
 		/* Shut down the rail */
-		if (host->vcc_aux)
-			ret = regulator_disable(host->vcc_aux);
-		if (host->vcc) {
+		if (mmc->supply.vqmmc)
+			ret = regulator_disable(mmc->supply.vqmmc);
+		if (mmc->supply.vmmc) {
 			/* Then proceed to shut down the local regulator */
-			ret = mmc_regulator_set_ocr(host->mmc,
-						host->vcc, 0);
+			ret = mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, 0);
 		}
 	}
 
@@ -345,33 +337,34 @@ static int omap_hsmmc_reg_get(struct omap_hsmmc_host *host)
 {
 	int ocr_value = 0;
 	int ret;
+	struct mmc_host *mmc = host->mmc;
 
 	if (mmc_pdata(host)->set_power)
 		return 0;
 
-	host->vcc = devm_regulator_get_optional(host->dev, "vmmc");
-	if (IS_ERR(host->vcc)) {
-		ret = PTR_ERR(host->vcc);
+	mmc->supply.vmmc = devm_regulator_get_optional(host->dev, "vmmc");
+	if (IS_ERR(mmc->supply.vmmc)) {
+		ret = PTR_ERR(mmc->supply.vmmc);
 		if (ret != -ENODEV)
 			return ret;
 		dev_dbg(host->dev, "unable to get vmmc regulator %ld\n",
-			PTR_ERR(host->vcc));
-		host->vcc = NULL;
+			PTR_ERR(mmc->supply.vmmc));
+		mmc->supply.vmmc = NULL;
 	} else {
-		ocr_value = mmc_regulator_get_ocrmask(host->vcc);
+		ocr_value = mmc_regulator_get_ocrmask(mmc->supply.vmmc);
 		if (ocr_value > 0)
 			mmc_pdata(host)->ocr_mask = ocr_value;
 	}
 
 	/* Allow an aux regulator */
-	host->vcc_aux = devm_regulator_get_optional(host->dev, "vmmc_aux");
-	if (IS_ERR(host->vcc_aux)) {
-		ret = PTR_ERR(host->vcc_aux);
+	mmc->supply.vqmmc = devm_regulator_get_optional(host->dev, "vmmc_aux");
+	if (IS_ERR(mmc->supply.vqmmc)) {
+		ret = PTR_ERR(mmc->supply.vqmmc);
 		if (ret != -ENODEV)
 			return ret;
 		dev_dbg(host->dev, "unable to get vmmc_aux regulator %ld\n",
-			PTR_ERR(host->vcc_aux));
-		host->vcc_aux = NULL;
+			PTR_ERR(mmc->supply.vqmmc));
+		mmc->supply.vqmmc = NULL;
 	}
 
 	host->pbias = devm_regulator_get_optional(host->dev, "pbias");
@@ -391,8 +384,8 @@ static int omap_hsmmc_reg_get(struct omap_hsmmc_host *host)
 	 * To disable boot_on regulator, enable regulator
 	 * to increase usecount and then disable it.
 	 */
-	if ((host->vcc && regulator_is_enabled(host->vcc) > 0) ||
-	    (host->vcc_aux && regulator_is_enabled(host->vcc_aux))) {
+	if ((mmc->supply.vmmc && regulator_is_enabled(mmc->supply.vmmc) > 0) ||
+	    (mmc->supply.vqmmc && regulator_is_enabled(mmc->supply.vqmmc))) {
 		int vdd = ffs(mmc_pdata(host)->ocr_mask) - 1;
 
 		omap_hsmmc_set_power(host->dev, 1, vdd);

From ef62b8bc2c740a7f72525a4797aa45056c833e3d Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Thu, 27 Aug 2015 14:43:58 +0530
Subject: [PATCH 415/734] mmc: host: omap_hsmmc: remove unnecessary pbias
 set_voltage

Remove the unnecessary pbias regulator_set_voltage done after
pbias regulator_disable in omap_hsmmc_set_power.

Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Reviewed-by: Roger Quadros <rogerq@ti.com>
Tested-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/omap_hsmmc.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 58e4ffd3c70e9c..c4c284e0246ada 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -272,7 +272,6 @@ static int omap_hsmmc_set_power(struct device *dev, int power_on, int vdd)
 			if (!ret)
 				host->pbias_enabled = 0;
 		}
-		regulator_set_voltage(host->pbias, VDD_3V0, VDD_3V0);
 	}
 
 	/*

From 229f329265d6d2a738fc861b7b9b6144980580f6 Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Thu, 27 Aug 2015 14:43:59 +0530
Subject: [PATCH 416/734] mmc: host: omap_hsmmc: return error if any of the
 regulator APIs fail

Return error if any of the regulator APIs (regulator_enable,
regulator_disable, regulator_set_voltage) fails in
omap_hsmmc_set_power to avoid undefined behavior.

Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Tested-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/omap_hsmmc.c | 52 +++++++++++++++++++++++++++--------
 1 file changed, 40 insertions(+), 12 deletions(-)

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index c4c284e0246ada..284ab0063156e9 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -269,8 +269,11 @@ static int omap_hsmmc_set_power(struct device *dev, int power_on, int vdd)
 	if (host->pbias) {
 		if (host->pbias_enabled == 1) {
 			ret = regulator_disable(host->pbias);
-			if (!ret)
-				host->pbias_enabled = 0;
+			if (ret) {
+				dev_err(dev, "pbias reg disable failed\n");
+				return ret;
+			}
+			host->pbias_enabled = 0;
 		}
 	}
 
@@ -288,23 +291,35 @@ static int omap_hsmmc_set_power(struct device *dev, int power_on, int vdd)
 	 * chips/cards need an interface voltage rail too.
 	 */
 	if (power_on) {
-		if (mmc->supply.vmmc)
+		if (mmc->supply.vmmc) {
 			ret = mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd);
+			if (ret)
+				return ret;
+		}
+
 		/* Enable interface voltage rail, if needed */
-		if (ret == 0 && mmc->supply.vqmmc) {
+		if (mmc->supply.vqmmc) {
 			ret = regulator_enable(mmc->supply.vqmmc);
-			if (ret < 0 && mmc->supply.vmmc)
-				ret = mmc_regulator_set_ocr(mmc,
-							    mmc->supply.vmmc,
-							    0);
+			if (ret) {
+				dev_err(dev, "vmmc_aux reg enable failed\n");
+				goto err_set_vqmmc;
+			}
 		}
 	} else {
 		/* Shut down the rail */
-		if (mmc->supply.vqmmc)
+		if (mmc->supply.vqmmc) {
 			ret = regulator_disable(mmc->supply.vqmmc);
+			if (ret) {
+				dev_err(dev, "vmmc_aux reg disable failed\n");
+				return ret;
+			}
+		}
+
 		if (mmc->supply.vmmc) {
 			/* Then proceed to shut down the local regulator */
 			ret = mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, 0);
+			if (ret)
+				return ret;
 		}
 	}
 
@@ -316,19 +331,32 @@ static int omap_hsmmc_set_power(struct device *dev, int power_on, int vdd)
 			ret = regulator_set_voltage(host->pbias, VDD_3V0,
 								VDD_3V0);
 		if (ret < 0)
-			goto error_set_power;
+			goto err_set_voltage;
 
 		if (host->pbias_enabled == 0) {
 			ret = regulator_enable(host->pbias);
-			if (!ret)
+			if (ret) {
+				dev_err(dev, "pbias reg enable failed\n");
+				goto err_set_voltage;
+			} else {
 				host->pbias_enabled = 1;
+			}
 		}
 	}
 
 	if (mmc_pdata(host)->after_set_reg)
 		mmc_pdata(host)->after_set_reg(dev, power_on, vdd);
 
-error_set_power:
+	return 0;
+
+err_set_voltage:
+	if (mmc->supply.vqmmc)
+		regulator_disable(mmc->supply.vqmmc);
+
+err_set_vqmmc:
+	if (mmc->supply.vmmc)
+		mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, 0);
+
 	return ret;
 }
 

From 2a17f84442e22cd1522400fcc0356c4a36b38361 Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Thu, 27 Aug 2015 14:44:00 +0530
Subject: [PATCH 417/734] mmc: host: omap_hsmmc: add separate functions for
 enable/disable supply

No functional change. Cleanup omap_hsmmc_set_power by adding separate
functions for enable/disable supply and invoke it from
omap_hsmmc_set_power.

Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Tested-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/omap_hsmmc.c | 101 ++++++++++++++++++++++------------
 1 file changed, 66 insertions(+), 35 deletions(-)

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 284ab0063156e9..3fa78d477a415e 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -246,6 +246,65 @@ static int omap_hsmmc_get_cover_state(struct device *dev)
 
 #ifdef CONFIG_REGULATOR
 
+static int omap_hsmmc_enable_supply(struct mmc_host *mmc, int vdd)
+{
+	int ret;
+
+	if (mmc->supply.vmmc) {
+		ret = mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd);
+		if (ret)
+			return ret;
+	}
+
+	/* Enable interface voltage rail, if needed */
+	if (mmc->supply.vqmmc) {
+		ret = regulator_enable(mmc->supply.vqmmc);
+		if (ret) {
+			dev_err(mmc_dev(mmc), "vmmc_aux reg enable failed\n");
+			goto err_vqmmc;
+		}
+	}
+
+	return 0;
+
+err_vqmmc:
+	if (mmc->supply.vmmc)
+		mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, 0);
+
+	return ret;
+}
+
+static int omap_hsmmc_disable_supply(struct mmc_host *mmc)
+{
+	int ret;
+	int status;
+
+	if (mmc->supply.vqmmc) {
+		ret = regulator_disable(mmc->supply.vqmmc);
+		if (ret) {
+			dev_err(mmc_dev(mmc), "vmmc_aux reg disable failed\n");
+			return ret;
+		}
+	}
+
+	if (mmc->supply.vmmc) {
+		ret = mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, 0);
+		if (ret)
+			goto err_set_ocr;
+	}
+
+	return 0;
+
+err_set_ocr:
+	if (mmc->supply.vqmmc) {
+		status = regulator_enable(mmc->supply.vqmmc);
+		if (status)
+			dev_err(mmc_dev(mmc), "vmmc_aux re-enable failed\n");
+	}
+
+	return ret;
+}
+
 static int omap_hsmmc_set_power(struct device *dev, int power_on, int vdd)
 {
 	struct omap_hsmmc_host *host =
@@ -291,36 +350,13 @@ static int omap_hsmmc_set_power(struct device *dev, int power_on, int vdd)
 	 * chips/cards need an interface voltage rail too.
 	 */
 	if (power_on) {
-		if (mmc->supply.vmmc) {
-			ret = mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd);
-			if (ret)
-				return ret;
-		}
-
-		/* Enable interface voltage rail, if needed */
-		if (mmc->supply.vqmmc) {
-			ret = regulator_enable(mmc->supply.vqmmc);
-			if (ret) {
-				dev_err(dev, "vmmc_aux reg enable failed\n");
-				goto err_set_vqmmc;
-			}
-		}
+		ret = omap_hsmmc_enable_supply(mmc, vdd);
+		if (ret)
+			return ret;
 	} else {
-		/* Shut down the rail */
-		if (mmc->supply.vqmmc) {
-			ret = regulator_disable(mmc->supply.vqmmc);
-			if (ret) {
-				dev_err(dev, "vmmc_aux reg disable failed\n");
-				return ret;
-			}
-		}
-
-		if (mmc->supply.vmmc) {
-			/* Then proceed to shut down the local regulator */
-			ret = mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, 0);
-			if (ret)
-				return ret;
-		}
+		ret = omap_hsmmc_disable_supply(mmc);
+		if (ret)
+			return ret;
 	}
 
 	if (host->pbias) {
@@ -350,12 +386,7 @@ static int omap_hsmmc_set_power(struct device *dev, int power_on, int vdd)
 	return 0;
 
 err_set_voltage:
-	if (mmc->supply.vqmmc)
-		regulator_disable(mmc->supply.vqmmc);
-
-err_set_vqmmc:
-	if (mmc->supply.vmmc)
-		mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, 0);
+	omap_hsmmc_disable_supply(mmc);
 
 	return ret;
 }

From ec85c95e8ce5df18608ee9aa6a2626d903f548af Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Thu, 27 Aug 2015 14:44:01 +0530
Subject: [PATCH 418/734] mmc: host: omap_hsmmc: add separate function to set
 pbias

No functional change. Cleanup omap_hsmmc_set_power by adding separate
functions to set pbias and invoke it from omap_hsmmc_set_power.

Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Tested-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/omap_hsmmc.c | 78 +++++++++++++++++++++--------------
 1 file changed, 48 insertions(+), 30 deletions(-)

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 3fa78d477a415e..810d612f58a0b7 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -305,6 +305,48 @@ static int omap_hsmmc_disable_supply(struct mmc_host *mmc)
 	return ret;
 }
 
+static int omap_hsmmc_set_pbias(struct omap_hsmmc_host *host, bool power_on,
+				int vdd)
+{
+	int ret;
+
+	if (!host->pbias)
+		return 0;
+
+	if (power_on) {
+		if (vdd <= VDD_165_195)
+			ret = regulator_set_voltage(host->pbias, VDD_1V8,
+						    VDD_1V8);
+		else
+			ret = regulator_set_voltage(host->pbias, VDD_3V0,
+						    VDD_3V0);
+		if (ret < 0) {
+			dev_err(host->dev, "pbias set voltage fail\n");
+			return ret;
+		}
+
+		if (host->pbias_enabled == 0) {
+			ret = regulator_enable(host->pbias);
+			if (ret) {
+				dev_err(host->dev, "pbias reg enable fail\n");
+				return ret;
+			}
+			host->pbias_enabled = 1;
+		}
+	} else {
+		if (host->pbias_enabled == 1) {
+			ret = regulator_disable(host->pbias);
+			if (ret) {
+				dev_err(host->dev, "pbias reg disable fail\n");
+				return ret;
+			}
+			host->pbias_enabled = 0;
+		}
+	}
+
+	return 0;
+}
+
 static int omap_hsmmc_set_power(struct device *dev, int power_on, int vdd)
 {
 	struct omap_hsmmc_host *host =
@@ -325,16 +367,9 @@ static int omap_hsmmc_set_power(struct device *dev, int power_on, int vdd)
 	if (mmc_pdata(host)->before_set_reg)
 		mmc_pdata(host)->before_set_reg(dev, power_on, vdd);
 
-	if (host->pbias) {
-		if (host->pbias_enabled == 1) {
-			ret = regulator_disable(host->pbias);
-			if (ret) {
-				dev_err(dev, "pbias reg disable failed\n");
-				return ret;
-			}
-			host->pbias_enabled = 0;
-		}
-	}
+	ret = omap_hsmmc_set_pbias(host, false, 0);
+	if (ret)
+		return ret;
 
 	/*
 	 * Assume Vcc regulator is used only to power the card ... OMAP
@@ -359,26 +394,9 @@ static int omap_hsmmc_set_power(struct device *dev, int power_on, int vdd)
 			return ret;
 	}
 
-	if (host->pbias) {
-		if (vdd <= VDD_165_195)
-			ret = regulator_set_voltage(host->pbias, VDD_1V8,
-								VDD_1V8);
-		else
-			ret = regulator_set_voltage(host->pbias, VDD_3V0,
-								VDD_3V0);
-		if (ret < 0)
-			goto err_set_voltage;
-
-		if (host->pbias_enabled == 0) {
-			ret = regulator_enable(host->pbias);
-			if (ret) {
-				dev_err(dev, "pbias reg enable failed\n");
-				goto err_set_voltage;
-			} else {
-				host->pbias_enabled = 1;
-			}
-		}
-	}
+	ret = omap_hsmmc_set_pbias(host, true, vdd);
+	if (ret)
+		goto err_set_voltage;
 
 	if (mmc_pdata(host)->after_set_reg)
 		mmc_pdata(host)->after_set_reg(dev, power_on, vdd);

From 97fe7e5ab6318ea5716f86be3b4ca8776a9e609c Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Thu, 27 Aug 2015 14:44:02 +0530
Subject: [PATCH 419/734] mmc: host: omap_hsmmc: avoid pbias regulator enable
 on power off

Fix omap_hsmmc_set_power so that pbias regulator is not enabled
during power off.

Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Tested-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/omap_hsmmc.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 810d612f58a0b7..eec69752a189da 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -388,16 +388,16 @@ static int omap_hsmmc_set_power(struct device *dev, int power_on, int vdd)
 		ret = omap_hsmmc_enable_supply(mmc, vdd);
 		if (ret)
 			return ret;
+
+		ret = omap_hsmmc_set_pbias(host, true, vdd);
+		if (ret)
+			goto err_set_voltage;
 	} else {
 		ret = omap_hsmmc_disable_supply(mmc);
 		if (ret)
 			return ret;
 	}
 
-	ret = omap_hsmmc_set_pbias(host, true, vdd);
-	if (ret)
-		goto err_set_voltage;
-
 	if (mmc_pdata(host)->after_set_reg)
 		mmc_pdata(host)->after_set_reg(dev, power_on, vdd);
 

From c8518efa6de999bcbd638702c2a2d72fe83431e4 Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Thu, 27 Aug 2015 14:44:03 +0530
Subject: [PATCH 420/734] mmc: host: omap_hsmmc: don't use ->set_power to set
 initial regulator state

If the regulator is enabled on boot (checked using regulator_is_enabled),
invoke regulator_enable() so that the usecount reflects the correct
state of the regulator and then disable the regulator so that the
initial state of the regulator is disabled. Avoid using ->set_power,
since set_power also takes care of setting the voltages which is not
needed at this point.

Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Tested-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/omap_hsmmc.c | 66 +++++++++++++++++++++++++++++------
 1 file changed, 56 insertions(+), 10 deletions(-)

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index eec69752a189da..cd4bd6d4c71e2d 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -409,6 +409,59 @@ static int omap_hsmmc_set_power(struct device *dev, int power_on, int vdd)
 	return ret;
 }
 
+static int omap_hsmmc_disable_boot_regulator(struct regulator *reg)
+{
+	int ret;
+
+	if (!reg)
+		return 0;
+
+	if (regulator_is_enabled(reg)) {
+		ret = regulator_enable(reg);
+		if (ret)
+			return ret;
+
+		ret = regulator_disable(reg);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int omap_hsmmc_disable_boot_regulators(struct omap_hsmmc_host *host)
+{
+	struct mmc_host *mmc = host->mmc;
+	int ret;
+
+	/*
+	 * disable regulators enabled during boot and get the usecount
+	 * right so that regulators can be enabled/disabled by checking
+	 * the return value of regulator_is_enabled
+	 */
+	ret = omap_hsmmc_disable_boot_regulator(mmc->supply.vmmc);
+	if (ret) {
+		dev_err(host->dev, "fail to disable boot enabled vmmc reg\n");
+		return ret;
+	}
+
+	ret = omap_hsmmc_disable_boot_regulator(mmc->supply.vqmmc);
+	if (ret) {
+		dev_err(host->dev,
+			"fail to disable boot enabled vmmc_aux reg\n");
+		return ret;
+	}
+
+	ret = omap_hsmmc_disable_boot_regulator(host->pbias);
+	if (ret) {
+		dev_err(host->dev,
+			"failed to disable boot enabled pbias reg\n");
+		return ret;
+	}
+
+	return 0;
+}
+
 static int omap_hsmmc_reg_get(struct omap_hsmmc_host *host)
 {
 	int ocr_value = 0;
@@ -456,17 +509,10 @@ static int omap_hsmmc_reg_get(struct omap_hsmmc_host *host)
 	/* For eMMC do not power off when not in sleep state */
 	if (mmc_pdata(host)->no_regulator_off_init)
 		return 0;
-	/*
-	 * To disable boot_on regulator, enable regulator
-	 * to increase usecount and then disable it.
-	 */
-	if ((mmc->supply.vmmc && regulator_is_enabled(mmc->supply.vmmc) > 0) ||
-	    (mmc->supply.vqmmc && regulator_is_enabled(mmc->supply.vqmmc))) {
-		int vdd = ffs(mmc_pdata(host)->ocr_mask) - 1;
 
-		omap_hsmmc_set_power(host->dev, 1, vdd);
-		omap_hsmmc_set_power(host->dev, 0, 0);
-	}
+	ret = omap_hsmmc_disable_boot_regulators(host);
+	if (ret)
+		return ret;
 
 	return 0;
 }

From 3f77f702389b7fbc955ec95721ce3861e40697fc Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Thu, 27 Aug 2015 14:44:04 +0530
Subject: [PATCH 421/734] mmc: host: omap_hsmmc: enable/disable vmmc_aux
 regulator based on previous state

enable vmmc_aux regulator only if it is in disabled state and disable
vmmc_aux regulator only if it is in enabled state.

Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Tested-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/omap_hsmmc.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index cd4bd6d4c71e2d..5a5946a7f2df88 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -184,6 +184,7 @@ struct omap_hsmmc_host {
 	struct	regulator	*pbias;
 	bool			pbias_enabled;
 	void	__iomem		*base;
+	int			vqmmc_enabled;
 	resource_size_t		mapbase;
 	spinlock_t		irq_lock; /* Prevent races with irq handler */
 	unsigned int		dma_len;
@@ -249,6 +250,7 @@ static int omap_hsmmc_get_cover_state(struct device *dev)
 static int omap_hsmmc_enable_supply(struct mmc_host *mmc, int vdd)
 {
 	int ret;
+	struct omap_hsmmc_host *host = mmc_priv(mmc);
 
 	if (mmc->supply.vmmc) {
 		ret = mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd);
@@ -257,12 +259,13 @@ static int omap_hsmmc_enable_supply(struct mmc_host *mmc, int vdd)
 	}
 
 	/* Enable interface voltage rail, if needed */
-	if (mmc->supply.vqmmc) {
+	if (mmc->supply.vqmmc && !host->vqmmc_enabled) {
 		ret = regulator_enable(mmc->supply.vqmmc);
 		if (ret) {
 			dev_err(mmc_dev(mmc), "vmmc_aux reg enable failed\n");
 			goto err_vqmmc;
 		}
+		host->vqmmc_enabled = 1;
 	}
 
 	return 0;
@@ -278,13 +281,15 @@ static int omap_hsmmc_disable_supply(struct mmc_host *mmc)
 {
 	int ret;
 	int status;
+	struct omap_hsmmc_host *host = mmc_priv(mmc);
 
-	if (mmc->supply.vqmmc) {
+	if (mmc->supply.vqmmc && host->vqmmc_enabled) {
 		ret = regulator_disable(mmc->supply.vqmmc);
 		if (ret) {
 			dev_err(mmc_dev(mmc), "vmmc_aux reg disable failed\n");
 			return ret;
 		}
+		host->vqmmc_enabled = 0;
 	}
 
 	if (mmc->supply.vmmc) {
@@ -2077,6 +2082,7 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
 	host->power_mode = MMC_POWER_OFF;
 	host->next_data.cookie = 1;
 	host->pbias_enabled = 0;
+	host->vqmmc_enabled = 0;
 
 	ret = omap_hsmmc_gpio_init(mmc, host, pdata);
 	if (ret)

From c55d7a0553643a7e8f120688b82b594471084d3c Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Thu, 27 Aug 2015 14:44:05 +0530
Subject: [PATCH 422/734] mmc: host: omap_hsmmc: use regulator_is_enabled to
 find pbias status

Use regulator_is_enabled of pbias regulator to find pbias regulator
status instead of maintaining a custom bookkeeping
pbias_enabled variable.

Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Tested-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/omap_hsmmc.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 5a5946a7f2df88..4cd7a5805de8dc 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -182,7 +182,6 @@ struct omap_hsmmc_host {
 	struct	clk		*fclk;
 	struct	clk		*dbclk;
 	struct	regulator	*pbias;
-	bool			pbias_enabled;
 	void	__iomem		*base;
 	int			vqmmc_enabled;
 	resource_size_t		mapbase;
@@ -330,22 +329,20 @@ static int omap_hsmmc_set_pbias(struct omap_hsmmc_host *host, bool power_on,
 			return ret;
 		}
 
-		if (host->pbias_enabled == 0) {
+		if (!regulator_is_enabled(host->pbias)) {
 			ret = regulator_enable(host->pbias);
 			if (ret) {
 				dev_err(host->dev, "pbias reg enable fail\n");
 				return ret;
 			}
-			host->pbias_enabled = 1;
 		}
 	} else {
-		if (host->pbias_enabled == 1) {
+		if (regulator_is_enabled(host->pbias)) {
 			ret = regulator_disable(host->pbias);
 			if (ret) {
 				dev_err(host->dev, "pbias reg disable fail\n");
 				return ret;
 			}
-			host->pbias_enabled = 0;
 		}
 	}
 
@@ -2081,7 +2078,6 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
 	host->base	= base + pdata->reg_offset;
 	host->power_mode = MMC_POWER_OFF;
 	host->next_data.cookie = 1;
-	host->pbias_enabled = 0;
 	host->vqmmc_enabled = 0;
 
 	ret = omap_hsmmc_gpio_init(mmc, host, pdata);

From 1d17f30bd87bf4857478b2a68dadf0096ca1cb40 Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Thu, 27 Aug 2015 14:44:06 +0530
Subject: [PATCH 423/734] mmc: host: omap_hsmmc: use ios->vdd for setting vmmc
 voltage

vdd voltage is set in mmc core to ios->vdd and vmmc should actually
be set to this voltage. Modify omap_hsmmc_enable_supply
to not take vdd as argument since now it's directly set to
the voltage in ios->vdd.

Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Tested-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/omap_hsmmc.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 4cd7a5805de8dc..58683b3c55b640 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -246,13 +246,14 @@ static int omap_hsmmc_get_cover_state(struct device *dev)
 
 #ifdef CONFIG_REGULATOR
 
-static int omap_hsmmc_enable_supply(struct mmc_host *mmc, int vdd)
+static int omap_hsmmc_enable_supply(struct mmc_host *mmc)
 {
 	int ret;
 	struct omap_hsmmc_host *host = mmc_priv(mmc);
+	struct mmc_ios *ios = &mmc->ios;
 
 	if (mmc->supply.vmmc) {
-		ret = mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd);
+		ret = mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, ios->vdd);
 		if (ret)
 			return ret;
 	}
@@ -387,7 +388,7 @@ static int omap_hsmmc_set_power(struct device *dev, int power_on, int vdd)
 	 * chips/cards need an interface voltage rail too.
 	 */
 	if (power_on) {
-		ret = omap_hsmmc_enable_supply(mmc, vdd);
+		ret = omap_hsmmc_enable_supply(mmc);
 		if (ret)
 			return ret;
 

From 987e05c9c3fbffba81104b8ae9a0dde9c73758e9 Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Thu, 27 Aug 2015 14:44:07 +0530
Subject: [PATCH 424/734] mmc: host: omap_hsmmc: remove CONFIG_REGULATOR check

Now that support for platforms which have optional regulator is added,
remove CONFIG_REGULATOR check in omap_hsmmc.

Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Tested-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/omap_hsmmc.c | 34 +++-------------------------------
 1 file changed, 3 insertions(+), 31 deletions(-)

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 58683b3c55b640..781e4db317671c 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -244,8 +244,6 @@ static int omap_hsmmc_get_cover_state(struct device *dev)
 	return mmc_gpio_get_cd(host->mmc);
 }
 
-#ifdef CONFIG_REGULATOR
-
 static int omap_hsmmc_enable_supply(struct mmc_host *mmc)
 {
 	int ret;
@@ -520,30 +518,6 @@ static int omap_hsmmc_reg_get(struct omap_hsmmc_host *host)
 	return 0;
 }
 
-static inline int omap_hsmmc_have_reg(void)
-{
-	return 1;
-}
-
-#else
-
-static int omap_hsmmc_set_power(struct device *dev, int power_on, int vdd)
-{
-	return 0;
-}
-
-static inline int omap_hsmmc_reg_get(struct omap_hsmmc_host *host)
-{
-	return -EINVAL;
-}
-
-static inline int omap_hsmmc_have_reg(void)
-{
-	return 0;
-}
-
-#endif
-
 static irqreturn_t omap_hsmmc_cover_irq(int irq, void *dev_id);
 
 static int omap_hsmmc_gpio_init(struct mmc_host *mmc,
@@ -2204,11 +2178,9 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
 		goto err_irq;
 	}
 
-	if (omap_hsmmc_have_reg()) {
-		ret = omap_hsmmc_reg_get(host);
-		if (ret)
-			goto err_irq;
-	}
+	ret = omap_hsmmc_reg_get(host);
+	if (ret)
+		goto err_irq;
 
 	mmc->ocr_avail = mmc_pdata(host)->ocr_mask;
 

From 0b6a3da9617a08e13afc09cb7e148470ed0eb280 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 26 Aug 2015 17:00:42 +0100
Subject: [PATCH 425/734] irqchip/GICv3: Convert to EOImode == 1

So far, GICv3 has been used in with EOImode == 0. The effect of this
mode is to perform the priority drop and the deactivation of the
interrupt at the same time.

While this works perfectly for Linux (we only have a single priority),
it causes issues when an interrupt is forwarded to a guest, and when
we want the guest to perform the EOI itself.

For this case, the GIC architecture provides EOImode == 1, where:
- A write to ICC_EOIR1_EL1 drops the priority of the interrupt and
  leaves it active. Other interrupts at the same priority level can
  now be taken, but the active interrupt cannot be taken again
- A write to ICC_DIR_EL1 marks the interrupt as inactive, meaning
  it can now be taken again.

This patch converts the driver to be able to use this new mode,
depending on whether or not the kernel can behave as a hypervisor.
No feature change.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-and-tested-by: Eric Auger <eric.auger@linaro.org>
Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: <linux-arm-kernel@lists.infradead.org>
Cc: kvmarm@lists.cs.columbia.edu
Cc: Jason Cooper <jason@lakedaemon.net>
Link: http://lkml.kernel.org/r/1440604845-28229-2-git-send-email-marc.zyngier@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/irqchip/irq-gic-v3.c       | 68 +++++++++++++++++++++++++++---
 include/linux/irqchip/arm-gic-v3.h |  9 ++++
 2 files changed, 71 insertions(+), 6 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index e406bc5f13e4f1..5c31cc9353d5f5 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -31,6 +31,7 @@
 #include <asm/cputype.h>
 #include <asm/exception.h>
 #include <asm/smp_plat.h>
+#include <asm/virt.h>
 
 #include "irq-gic-common.h"
 
@@ -50,6 +51,7 @@ struct gic_chip_data {
 };
 
 static struct gic_chip_data gic_data __read_mostly;
+static struct static_key supports_deactivate = STATIC_KEY_INIT_TRUE;
 
 #define gic_data_rdist()		(this_cpu_ptr(gic_data.rdists.rdist))
 #define gic_data_rdist_rd_base()	(gic_data_rdist()->rd_base)
@@ -231,6 +233,11 @@ static void gic_mask_irq(struct irq_data *d)
 	gic_poke_irq(d, GICD_ICENABLER);
 }
 
+static void gic_eoimode1_mask_irq(struct irq_data *d)
+{
+	gic_mask_irq(d);
+}
+
 static void gic_unmask_irq(struct irq_data *d)
 {
 	gic_poke_irq(d, GICD_ISENABLER);
@@ -296,6 +303,16 @@ static void gic_eoi_irq(struct irq_data *d)
 	gic_write_eoir(gic_irq(d));
 }
 
+static void gic_eoimode1_eoi_irq(struct irq_data *d)
+{
+	/*
+	 * No need to deactivate an LPI.
+	 */
+	if (gic_irq(d) >= 8192)
+		return;
+	gic_write_dir(gic_irq(d));
+}
+
 static int gic_set_type(struct irq_data *d, unsigned int type)
 {
 	unsigned int irq = gic_irq(d);
@@ -343,15 +360,26 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs
 
 		if (likely(irqnr > 15 && irqnr < 1020) || irqnr >= 8192) {
 			int err;
+
+			if (static_key_true(&supports_deactivate))
+				gic_write_eoir(irqnr);
+
 			err = handle_domain_irq(gic_data.domain, irqnr, regs);
 			if (err) {
 				WARN_ONCE(true, "Unexpected interrupt received!\n");
-				gic_write_eoir(irqnr);
+				if (static_key_true(&supports_deactivate)) {
+					if (irqnr < 8192)
+						gic_write_dir(irqnr);
+				} else {
+					gic_write_eoir(irqnr);
+				}
 			}
 			continue;
 		}
 		if (irqnr < 16) {
 			gic_write_eoir(irqnr);
+			if (static_key_true(&supports_deactivate))
+				gic_write_dir(irqnr);
 #ifdef CONFIG_SMP
 			handle_IPI(irqnr, regs);
 #else
@@ -451,8 +479,13 @@ static void gic_cpu_sys_reg_init(void)
 	/* Set priority mask register */
 	gic_write_pmr(DEFAULT_PMR_VALUE);
 
-	/* EOI deactivates interrupt too (mode 0) */
-	gic_write_ctlr(ICC_CTLR_EL1_EOImode_drop_dir);
+	if (static_key_true(&supports_deactivate)) {
+		/* EOI drops priority only (mode 1) */
+		gic_write_ctlr(ICC_CTLR_EL1_EOImode_drop);
+	} else {
+		/* EOI deactivates interrupt too (mode 0) */
+		gic_write_ctlr(ICC_CTLR_EL1_EOImode_drop_dir);
+	}
 
 	/* ... and let's hit the road... */
 	gic_write_grpen1(1);
@@ -661,11 +694,28 @@ static struct irq_chip gic_chip = {
 	.flags			= IRQCHIP_SET_TYPE_MASKED,
 };
 
+static struct irq_chip gic_eoimode1_chip = {
+	.name			= "GICv3",
+	.irq_mask		= gic_eoimode1_mask_irq,
+	.irq_unmask		= gic_unmask_irq,
+	.irq_eoi		= gic_eoimode1_eoi_irq,
+	.irq_set_type		= gic_set_type,
+	.irq_set_affinity	= gic_set_affinity,
+	.irq_get_irqchip_state	= gic_irq_get_irqchip_state,
+	.irq_set_irqchip_state	= gic_irq_set_irqchip_state,
+	.flags			= IRQCHIP_SET_TYPE_MASKED,
+};
+
 #define GIC_ID_NR		(1U << gic_data.rdists.id_bits)
 
 static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
 			      irq_hw_number_t hw)
 {
+	struct irq_chip *chip = &gic_chip;
+
+	if (static_key_true(&supports_deactivate))
+		chip = &gic_eoimode1_chip;
+
 	/* SGIs are private to the core kernel */
 	if (hw < 16)
 		return -EPERM;
@@ -679,13 +729,13 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
 	/* PPIs */
 	if (hw < 32) {
 		irq_set_percpu_devid(irq);
-		irq_domain_set_info(d, irq, hw, &gic_chip, d->host_data,
+		irq_domain_set_info(d, irq, hw, chip, d->host_data,
 				    handle_percpu_devid_irq, NULL, NULL);
 		set_irq_flags(irq, IRQF_VALID | IRQF_NOAUTOEN);
 	}
 	/* SPIs */
 	if (hw >= 32 && hw < gic_data.irq_nr) {
-		irq_domain_set_info(d, irq, hw, &gic_chip, d->host_data,
+		irq_domain_set_info(d, irq, hw, chip, d->host_data,
 				    handle_fasteoi_irq, NULL, NULL);
 		set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
 	}
@@ -693,7 +743,7 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
 	if (hw >= 8192 && hw < GIC_ID_NR) {
 		if (!gic_dist_supports_lpis())
 			return -EPERM;
-		irq_domain_set_info(d, irq, hw, &gic_chip, d->host_data,
+		irq_domain_set_info(d, irq, hw, chip, d->host_data,
 				    handle_fasteoi_irq, NULL, NULL);
 		set_irq_flags(irq, IRQF_VALID);
 	}
@@ -820,6 +870,12 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare
 	if (of_property_read_u64(node, "redistributor-stride", &redist_stride))
 		redist_stride = 0;
 
+	if (!is_hyp_mode_available())
+		static_key_slow_dec(&supports_deactivate);
+
+	if (static_key_true(&supports_deactivate))
+		pr_info("GIC: Using split EOI/Deactivate mode\n");
+
 	gic_data.dist_base = dist_base;
 	gic_data.redist_regions = rdist_regs;
 	gic_data.nr_redist_regions = nr_redist_regions;
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index bf982e021fbd04..71e4faf33091a1 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -104,6 +104,8 @@
 #define GICR_SYNCR			0x00C0
 #define GICR_MOVLPIR			0x0100
 #define GICR_MOVALLR			0x0110
+#define GICR_ISACTIVER			GICD_ISACTIVER
+#define GICR_ICACTIVER			GICD_ICACTIVER
 #define GICR_IDREGS			GICD_IDREGS
 #define GICR_PIDR2			GICD_PIDR2
 
@@ -288,6 +290,7 @@
 #define ICH_VMCR_PMR_MASK		(0xffUL << ICH_VMCR_PMR_SHIFT)
 
 #define ICC_EOIR1_EL1			sys_reg(3, 0, 12, 12, 1)
+#define ICC_DIR_EL1			sys_reg(3, 0, 12, 11, 1)
 #define ICC_IAR1_EL1			sys_reg(3, 0, 12, 12, 0)
 #define ICC_SGI1R_EL1			sys_reg(3, 0, 12, 11, 5)
 #define ICC_PMR_EL1			sys_reg(3, 0, 4, 6, 0)
@@ -385,6 +388,12 @@ static inline void gic_write_eoir(u64 irq)
 	isb();
 }
 
+static inline void gic_write_dir(u64 irq)
+{
+	asm volatile("msr_s " __stringify(ICC_DIR_EL1) ", %0" : : "r" (irq));
+	isb();
+}
+
 struct irq_domain;
 int its_cpu_init(void);
 int its_init(struct device_node *node, struct rdists *rdists,

From 530bf353e4eb06bcba5078390c949650cd26a7c7 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 26 Aug 2015 17:00:43 +0100
Subject: [PATCH 426/734] irqchip/GICv3: Don't deactivate interrupts forwarded
 to a guest

Commit 0a4377de3056 ("genirq: Introduce irq_set_vcpu_affinity() to
target an interrupt to a VCPU") added just what we needed at the
lowest level to allow an interrupt to be deactivated by a guest.

When such a request reaches the GIC, it knows it doesn't need to
perform the deactivation anymore, and can safely leave the guest
do its magic. This of course requires additional support in both
VFIO and KVM.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-and-tested-by: Eric Auger <eric.auger@linaro.org>
Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: <linux-arm-kernel@lists.infradead.org>
Cc: kvmarm@lists.cs.columbia.edu
Cc: Jason Cooper <jason@lakedaemon.net>
Link: http://lkml.kernel.org/r/1440604845-28229-3-git-send-email-marc.zyngier@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/irqchip/irq-gic-v3.c | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 5c31cc9353d5f5..7deed6ef54c2ea 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -70,6 +70,11 @@ static inline int gic_irq_in_rdist(struct irq_data *d)
 	return gic_irq(d) < 32;
 }
 
+static inline bool forwarded_irq(struct irq_data *d)
+{
+	return d->handler_data != NULL;
+}
+
 static inline void __iomem *gic_dist_base(struct irq_data *d)
 {
 	if (gic_irq_in_rdist(d))	/* SGI+PPI -> SGI_base for this CPU */
@@ -236,6 +241,16 @@ static void gic_mask_irq(struct irq_data *d)
 static void gic_eoimode1_mask_irq(struct irq_data *d)
 {
 	gic_mask_irq(d);
+	/*
+	 * When masking a forwarded interrupt, make sure it is
+	 * deactivated as well.
+	 *
+	 * This ensures that an interrupt that is getting
+	 * disabled/masked will not get "stuck", because there is
+	 * noone to deactivate it (guest is being terminated).
+	 */
+	if (forwarded_irq(d))
+		gic_poke_irq(d, GICD_ICACTIVER);
 }
 
 static void gic_unmask_irq(struct irq_data *d)
@@ -306,9 +321,10 @@ static void gic_eoi_irq(struct irq_data *d)
 static void gic_eoimode1_eoi_irq(struct irq_data *d)
 {
 	/*
-	 * No need to deactivate an LPI.
+	 * No need to deactivate an LPI, or an interrupt that
+	 * is is getting forwarded to a vcpu.
 	 */
-	if (gic_irq(d) >= 8192)
+	if (gic_irq(d) >= 8192 || forwarded_irq(d))
 		return;
 	gic_write_dir(gic_irq(d));
 }
@@ -339,6 +355,12 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
 	return gic_configure_irq(irq, type, base, rwp_wait);
 }
 
+static int gic_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
+{
+	d->handler_data = vcpu;
+	return 0;
+}
+
 static u64 gic_mpidr_to_affinity(u64 mpidr)
 {
 	u64 aff;
@@ -703,6 +725,7 @@ static struct irq_chip gic_eoimode1_chip = {
 	.irq_set_affinity	= gic_set_affinity,
 	.irq_get_irqchip_state	= gic_irq_get_irqchip_state,
 	.irq_set_irqchip_state	= gic_irq_set_irqchip_state,
+	.irq_set_vcpu_affinity	= gic_irq_set_vcpu_affinity,
 	.flags			= IRQCHIP_SET_TYPE_MASKED,
 };
 

From 0b996fd35957a30568cddbce05b917c1897966e0 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 26 Aug 2015 17:00:44 +0100
Subject: [PATCH 427/734] irqchip/GIC: Convert to EOImode == 1

So far, GICv2 has been used with EOImode == 0. The effect of this
mode is to perform the priority drop and the deactivation of the
interrupt at the same time.

While this works perfectly for Linux (we only have a single priority),
it causes issues when an interrupt is forwarded to a guest, and when
we want the guest to perform the EOI itself.

For this case, the GIC architecture provides EOImode == 1, where:
- A write to the EOI register drops the priority of the interrupt
  and leaves it active. Other interrupts at the same priority level
  can now be taken, but the active interrupt cannot be taken again
- A write to the DIR marks the interrupt as inactive, meaning it can
  now be taken again.

We only enable this feature when booted in HYP mode and that
the device-tree reported a suitable CPU interface. Observable behaviour
should remain unchanged.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-and-tested-by: Eric Auger <eric.auger@linaro.org>
Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: <linux-arm-kernel@lists.infradead.org>
Cc: kvmarm@lists.cs.columbia.edu
Cc: Jason Cooper <jason@lakedaemon.net>
Link: http://lkml.kernel.org/r/1440604845-28229-4-git-send-email-marc.zyngier@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/irqchip/irq-gic.c       | 71 +++++++++++++++++++++++++++++++--
 include/linux/irqchip/arm-gic.h |  4 ++
 2 files changed, 72 insertions(+), 3 deletions(-)

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index aa3e7b8a69c434..c835f4c60d21c2 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -47,6 +47,7 @@
 #include <asm/irq.h>
 #include <asm/exception.h>
 #include <asm/smp_plat.h>
+#include <asm/virt.h>
 
 #include "irq-gic-common.h"
 
@@ -82,6 +83,8 @@ static DEFINE_RAW_SPINLOCK(irq_controller_lock);
 #define NR_GIC_CPU_IF 8
 static u8 gic_cpu_map[NR_GIC_CPU_IF] __read_mostly;
 
+static struct static_key supports_deactivate = STATIC_KEY_INIT_TRUE;
+
 #ifndef MAX_GIC_NR
 #define MAX_GIC_NR	1
 #endif
@@ -157,6 +160,11 @@ static void gic_mask_irq(struct irq_data *d)
 	gic_poke_irq(d, GIC_DIST_ENABLE_CLEAR);
 }
 
+static void gic_eoimode1_mask_irq(struct irq_data *d)
+{
+	gic_mask_irq(d);
+}
+
 static void gic_unmask_irq(struct irq_data *d)
 {
 	gic_poke_irq(d, GIC_DIST_ENABLE_SET);
@@ -167,6 +175,11 @@ static void gic_eoi_irq(struct irq_data *d)
 	writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_EOI);
 }
 
+static void gic_eoimode1_eoi_irq(struct irq_data *d)
+{
+	writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_DEACTIVATE);
+}
+
 static int gic_irq_set_irqchip_state(struct irq_data *d,
 				     enum irqchip_irq_state which, bool val)
 {
@@ -272,11 +285,15 @@ static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
 		irqnr = irqstat & GICC_IAR_INT_ID_MASK;
 
 		if (likely(irqnr > 15 && irqnr < 1021)) {
+			if (static_key_true(&supports_deactivate))
+				writel_relaxed(irqstat, cpu_base + GIC_CPU_EOI);
 			handle_domain_irq(gic->domain, irqnr, regs);
 			continue;
 		}
 		if (irqnr < 16) {
 			writel_relaxed(irqstat, cpu_base + GIC_CPU_EOI);
+			if (static_key_true(&supports_deactivate))
+				writel_relaxed(irqstat, cpu_base + GIC_CPU_DEACTIVATE);
 #ifdef CONFIG_SMP
 			handle_IPI(irqnr, regs);
 #endif
@@ -329,6 +346,22 @@ static struct irq_chip gic_chip = {
 				  IRQCHIP_MASK_ON_SUSPEND,
 };
 
+static struct irq_chip gic_eoimode1_chip = {
+	.name			= "GICv2",
+	.irq_mask		= gic_eoimode1_mask_irq,
+	.irq_unmask		= gic_unmask_irq,
+	.irq_eoi		= gic_eoimode1_eoi_irq,
+	.irq_set_type		= gic_set_type,
+#ifdef CONFIG_SMP
+	.irq_set_affinity	= gic_set_affinity,
+#endif
+	.irq_get_irqchip_state	= gic_irq_get_irqchip_state,
+	.irq_set_irqchip_state	= gic_irq_set_irqchip_state,
+	.flags			= IRQCHIP_SET_TYPE_MASKED |
+				  IRQCHIP_SKIP_SET_WAKE |
+				  IRQCHIP_MASK_ON_SUSPEND,
+};
+
 void __init gic_cascade_irq(unsigned int gic_nr, unsigned int irq)
 {
 	if (gic_nr >= MAX_GIC_NR)
@@ -360,6 +393,10 @@ static void gic_cpu_if_up(struct gic_chip_data *gic)
 {
 	void __iomem *cpu_base = gic_data_cpu_base(gic);
 	u32 bypass = 0;
+	u32 mode = 0;
+
+	if (static_key_true(&supports_deactivate))
+		mode = GIC_CPU_CTRL_EOImodeNS;
 
 	/*
 	* Preserve bypass disable bits to be written back later
@@ -367,7 +404,7 @@ static void gic_cpu_if_up(struct gic_chip_data *gic)
 	bypass = readl(cpu_base + GIC_CPU_CTRL);
 	bypass &= GICC_DIS_BYPASS_MASK;
 
-	writel_relaxed(bypass | GICC_ENABLE, cpu_base + GIC_CPU_CTRL);
+	writel_relaxed(bypass | mode | GICC_ENABLE, cpu_base + GIC_CPU_CTRL);
 }
 
 
@@ -803,13 +840,20 @@ void __init gic_init_physaddr(struct device_node *node)
 static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
 				irq_hw_number_t hw)
 {
+	struct irq_chip *chip = &gic_chip;
+
+	if (static_key_true(&supports_deactivate)) {
+		if (d->host_data == (void *)&gic_data[0])
+			chip = &gic_eoimode1_chip;
+	}
+
 	if (hw < 32) {
 		irq_set_percpu_devid(irq);
-		irq_domain_set_info(d, irq, hw, &gic_chip, d->host_data,
+		irq_domain_set_info(d, irq, hw, chip, d->host_data,
 				    handle_percpu_devid_irq, NULL, NULL);
 		set_irq_flags(irq, IRQF_VALID | IRQF_NOAUTOEN);
 	} else {
-		irq_domain_set_info(d, irq, hw, &gic_chip, d->host_data,
+		irq_domain_set_info(d, irq, hw, chip, d->host_data,
 				    handle_fasteoi_irq, NULL, NULL);
 		set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
 	}
@@ -995,6 +1039,8 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
 		register_cpu_notifier(&gic_cpu_notifier);
 #endif
 		set_handle_irq(gic_handle_irq);
+		if (static_key_true(&supports_deactivate))
+			pr_info("GIC: Using split EOI/Deactivate mode\n");
 	}
 
 	gic_dist_init(gic);
@@ -1010,6 +1056,7 @@ gic_of_init(struct device_node *node, struct device_node *parent)
 {
 	void __iomem *cpu_base;
 	void __iomem *dist_base;
+	struct resource cpu_res;
 	u32 percpu_offset;
 	int irq;
 
@@ -1022,6 +1069,16 @@ gic_of_init(struct device_node *node, struct device_node *parent)
 	cpu_base = of_iomap(node, 1);
 	WARN(!cpu_base, "unable to map gic cpu registers\n");
 
+	of_address_to_resource(node, 1, &cpu_res);
+
+	/*
+	 * Disable split EOI/Deactivate if either HYP is not available
+	 * or the CPU interface is too small.
+	 */
+	if (gic_cnt == 0 && (!is_hyp_mode_available() ||
+			     resource_size(&cpu_res) < SZ_8K))
+		static_key_slow_dec(&supports_deactivate);
+
 	if (of_property_read_u32(node, "cpu-offset", &percpu_offset))
 		percpu_offset = 0;
 
@@ -1140,6 +1197,14 @@ gic_v2_acpi_init(struct acpi_table_header *table)
 		return -ENOMEM;
 	}
 
+	/*
+	 * Disable split EOI/Deactivate if HYP is not available. ACPI
+	 * guarantees that we'll always have a GICv2, so the CPU
+	 * interface will always be the right size.
+	 */
+	if (!is_hyp_mode_available())
+		static_key_slow_dec(&supports_deactivate);
+
 	/*
 	 * Initialize zero GIC instance (no multi-GIC support). Also, set GIC
 	 * as default IRQ domain to allow for GSI registration and GSI to IRQ
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 65da435d01c102..af3d29f7078117 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -20,9 +20,13 @@
 #define GIC_CPU_ALIAS_BINPOINT		0x1c
 #define GIC_CPU_ACTIVEPRIO		0xd0
 #define GIC_CPU_IDENT			0xfc
+#define GIC_CPU_DEACTIVATE		0x1000
 
 #define GICC_ENABLE			0x1
 #define GICC_INT_PRI_THRESHOLD		0xf0
+
+#define GIC_CPU_CTRL_EOImodeNS		(1 << 9)
+
 #define GICC_IAR_INT_ID_MASK		0x3ff
 #define GICC_INT_SPURIOUS		1023
 #define GICC_DIS_BYPASS_MASK		0x1e0

From 01f779f4862b53810ba4eb247f57bd1ad31d1c18 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 26 Aug 2015 17:00:45 +0100
Subject: [PATCH 428/734] irqchip/GIC: Don't deactivate interrupts forwarded to
 a guest

Commit 0a4377de3056 ("genirq: Introduce irq_set_vcpu_affinity() to
target an interrupt to a VCPU") added just what we needed at the
lowest level to allow an interrupt to be deactivated by a guest.

When such a request reaches the GIC, it knows it doesn't need to
perform the deactivation anymore, and can safely leave the guest
do its magic. This of course requires additional support in both
VFIO and KVM.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-and-tested-by: Eric Auger <eric.auger@linaro.org>
Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: <linux-arm-kernel@lists.infradead.org>
Cc: kvmarm@lists.cs.columbia.edu
Cc: Jason Cooper <jason@lakedaemon.net>
Link: http://lkml.kernel.org/r/1440604845-28229-5-git-send-email-marc.zyngier@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/irqchip/irq-gic.c | 55 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index c835f4c60d21c2..72bf81b8abfcdf 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -140,6 +140,36 @@ static inline unsigned int gic_irq(struct irq_data *d)
 	return d->hwirq;
 }
 
+static inline bool cascading_gic_irq(struct irq_data *d)
+{
+	void *data = irq_data_get_irq_handler_data(d);
+
+	/*
+	 * If handler_data pointing to one of the secondary GICs, then
+	 * this is a cascading interrupt, and it cannot possibly be
+	 * forwarded.
+	 */
+	if (data >= (void *)(gic_data + 1) &&
+	    data <  (void *)(gic_data + MAX_GIC_NR))
+		return true;
+
+	return false;
+}
+
+static inline bool forwarded_irq(struct irq_data *d)
+{
+	/*
+	 * A forwarded interrupt:
+	 * - is on the primary GIC
+	 * - has its handler_data set to a value
+	 * - that isn't a secondary GIC
+	 */
+	if (d->handler_data && !cascading_gic_irq(d))
+		return true;
+
+	return false;
+}
+
 /*
  * Routines to acknowledge, disable and enable interrupts
  */
@@ -163,6 +193,16 @@ static void gic_mask_irq(struct irq_data *d)
 static void gic_eoimode1_mask_irq(struct irq_data *d)
 {
 	gic_mask_irq(d);
+	/*
+	 * When masking a forwarded interrupt, make sure it is
+	 * deactivated as well.
+	 *
+	 * This ensures that an interrupt that is getting
+	 * disabled/masked will not get "stuck", because there is
+	 * noone to deactivate it (guest is being terminated).
+	 */
+	if (forwarded_irq(d))
+		gic_poke_irq(d, GIC_DIST_ACTIVE_CLEAR);
 }
 
 static void gic_unmask_irq(struct irq_data *d)
@@ -177,6 +217,10 @@ static void gic_eoi_irq(struct irq_data *d)
 
 static void gic_eoimode1_eoi_irq(struct irq_data *d)
 {
+	/* Do not deactivate an IRQ forwarded to a vcpu. */
+	if (forwarded_irq(d))
+		return;
+
 	writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_DEACTIVATE);
 }
 
@@ -246,6 +290,16 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
 	return gic_configure_irq(gicirq, type, base, NULL);
 }
 
+static int gic_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
+{
+	/* Only interrupts on the primary GIC can be forwarded to a vcpu. */
+	if (cascading_gic_irq(d))
+		return -EINVAL;
+
+	d->handler_data = vcpu;
+	return 0;
+}
+
 #ifdef CONFIG_SMP
 static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
 			    bool force)
@@ -357,6 +411,7 @@ static struct irq_chip gic_eoimode1_chip = {
 #endif
 	.irq_get_irqchip_state	= gic_irq_get_irqchip_state,
 	.irq_set_irqchip_state	= gic_irq_set_irqchip_state,
+	.irq_set_vcpu_affinity	= gic_irq_set_vcpu_affinity,
 	.flags			= IRQCHIP_SET_TYPE_MASKED |
 				  IRQCHIP_SKIP_SET_WAKE |
 				  IRQCHIP_MASK_ON_SUSPEND,

From 3f1d44ae640172482a8c0125efe9ca93331b056b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 27 Aug 2015 11:13:36 +0100
Subject: [PATCH 429/734] Documentation/Changes: Now need OpenSSL devel
 packages for module signing

The module signing script (sign-file) used to be a wrapper around the
openssl program.  It has now been replaced by a C program that uses the
crypto library from the OpenSSL package meaning that the OpenSSL devel
packages are necessary to provide the devel library link and the header
files.

This would be openssl-devel on Fedora and libssl-dev on Debian.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 Documentation/Changes | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/Documentation/Changes b/Documentation/Changes
index 646cdaa6e9d133..6d886300485827 100644
--- a/Documentation/Changes
+++ b/Documentation/Changes
@@ -43,6 +43,7 @@ o  udev                   081                     # udevd --version
 o  grub                   0.93                    # grub --version || grub-install --version
 o  mcelog                 0.6                     # mcelog --version
 o  iptables               1.4.2                   # iptables -V
+o  openssl & libcrypto    1.0.1k                  # openssl version
 
 
 Kernel compilation
@@ -79,6 +80,17 @@ BC
 You will need bc to build kernels 3.10 and higher
 
 
+OpenSSL
+-------
+
+Module signing and external certificate handling use the OpenSSL program and
+crypto library to do key creation and signature generation.
+
+You will need openssl to build kernels 3.7 and higher if module signing is
+enabled.  You will also need openssl development packages to build kernels 4.3
+and higher.
+
+
 System utilities
 ================
 
@@ -295,6 +307,10 @@ Binutils
 --------
 o  <ftp://ftp.kernel.org/pub/linux/devel/binutils/>
 
+OpenSSL
+-------
+o  <https://www.openssl.org/>
+
 System utilities
 ****************
 
@@ -392,4 +408,3 @@ o  <http://oprofile.sf.net/download/>
 NFS-Utils
 ---------
 o  <http://nfs.sourceforge.net/>
-

From f21fb798fe38cf87b177d45820991f0e315c0ba8 Mon Sep 17 00:00:00 2001
From: Naresh Kamboju <naresh.kamboju@linaro.org>
Date: Tue, 18 Aug 2015 12:31:59 +0530
Subject: [PATCH 430/734] selftests/zram: Adding zram tests

zram: Compressed RAM based block devices
----------------------------------------
The zram module creates RAM based block devices named /dev/zram<id>
(<id> = 0, 1, ...). Pages written to these disks are compressed and stored
in memory itself. These disks allow very fast I/O and compression provides
good amounts of memory savings. Some of the usecases include /tmp storage,
use as swap disks, various caches under /var and maybe many more :)

Statistics for individual zram devices are exported through sysfs nodes at
/sys/block/zram<id>/

This patch is to validate the zram functionality. Test interacts with block
device /dev/zram<id> and sysfs nodes /sys/block/zram<id>/

zram.sh: sanity check of CONFIG_ZRAM and to run zram01 and zram02 tests
zram01.sh: creates general purpose ram disks with different filesystems
zram02.sh: creates block device for swap
zram_lib.sh: create library with initialization/cleanup functions
README: ZRAM introduction and Kconfig required.
Makefile: To run zram tests

zram test output
-----------------
./zram.sh
--------------------
running zram tests
--------------------
/dev/zram0 device file found: OK
set max_comp_streams to zram device(s)
/sys/block/zram0/max_comp_streams = '2' (1/1)
zram max streams: OK
test that we can set compression algorithm
supported algs: [lzo] lz4
/sys/block/zram0/comp_algorithm = 'lzo' (1/1)
zram set compression algorithm: OK
set disk size to zram device(s)
/sys/block/zram0/disksize = '2097152' (1/1)
zram set disksizes: OK
set memory limit to zram device(s)
/sys/block/zram0/mem_limit = '2M' (1/1)
zram set memory limit: OK
make ext4 filesystem on /dev/zram0
zram mkfs.ext4: OK
mount /dev/zram0
zram mount of zram device(s): OK
fill zram0...
zram0 can be filled with '1932' KB
zram used 3M, zram disk sizes 2097152M
zram compression ratio: 699050.66:1: OK
zram cleanup
zram01 : [PASS]

/dev/zram0 device file found: OK
set max_comp_streams to zram device(s)
/sys/block/zram0/max_comp_streams = '2' (1/1)
zram max streams: OK
set disk size to zram device(s)
/sys/block/zram0/disksize = '1048576' (1/1)
zram set disksizes: OK
set memory limit to zram device(s)
/sys/block/zram0/mem_limit = '1M' (1/1)
zram set memory limit: OK
make swap with zram device(s)
done with /dev/zram0
zram making zram mkswap and swapon: OK
zram swapoff: OK
zram cleanup
zram02 : [PASS]

CC: Shuah Khan <shuahkh@osg.samsung.com>
CC: Tyler Baker <tyler.baker@linaro.org>
CC: Milosz Wasilewski <milosz.wasilewski@linaro.org>
CC: Alexey Kodanev <alexey.kodanev@oracle.com>
Signed-off-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
Reviewed-By: Tyler Baker <tyler.baker@linaro.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/Makefile         |   1 +
 tools/testing/selftests/zram/Makefile    |  12 ++
 tools/testing/selftests/zram/README      |  40 ++++
 tools/testing/selftests/zram/zram.sh     |  23 +++
 tools/testing/selftests/zram/zram01.sh   |  98 ++++++++++
 tools/testing/selftests/zram/zram02.sh   |  53 ++++++
 tools/testing/selftests/zram/zram_lib.sh | 222 +++++++++++++++++++++++
 7 files changed, 449 insertions(+)
 create mode 100644 tools/testing/selftests/zram/Makefile
 create mode 100644 tools/testing/selftests/zram/README
 create mode 100755 tools/testing/selftests/zram/zram.sh
 create mode 100755 tools/testing/selftests/zram/zram01.sh
 create mode 100755 tools/testing/selftests/zram/zram02.sh
 create mode 100755 tools/testing/selftests/zram/zram_lib.sh

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 24ae9e829e9aa2..9763dd95b4c82c 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -22,6 +22,7 @@ endif
 TARGETS += user
 TARGETS += vm
 TARGETS += x86
+TARGETS += zram
 #Please keep the TARGETS list alphabetically sorted
 # Run "make quicktest=1 run_tests" or
 # "make quicktest=1 kselftest from top level Makefile
diff --git a/tools/testing/selftests/zram/Makefile b/tools/testing/selftests/zram/Makefile
new file mode 100644
index 00000000000000..ec45513c730355
--- /dev/null
+++ b/tools/testing/selftests/zram/Makefile
@@ -0,0 +1,12 @@
+all:
+
+TEST_PROGS := zram.sh
+TEST_FILES := zram01.sh zram02.sh zram_lib.sh
+
+include ../lib.mk
+
+run_tests:
+	@/bin/bash ./zram.sh
+
+clean:
+	$(RM) err.log
diff --git a/tools/testing/selftests/zram/README b/tools/testing/selftests/zram/README
new file mode 100644
index 00000000000000..eb17917c8a3a57
--- /dev/null
+++ b/tools/testing/selftests/zram/README
@@ -0,0 +1,40 @@
+zram: Compressed RAM based block devices
+----------------------------------------
+* Introduction
+
+The zram module creates RAM based block devices named /dev/zram<id>
+(<id> = 0, 1, ...). Pages written to these disks are compressed and stored
+in memory itself. These disks allow very fast I/O and compression provides
+good amounts of memory savings. Some of the usecases include /tmp storage,
+use as swap disks, various caches under /var and maybe many more :)
+
+Statistics for individual zram devices are exported through sysfs nodes at
+/sys/block/zram<id>/
+
+Kconfig required:
+CONFIG_ZRAM=y
+CONFIG_ZRAM_LZ4_COMPRESS=y
+CONFIG_ZPOOL=y
+CONFIG_ZSMALLOC=y
+
+ZRAM Testcases
+--------------
+zram_lib.sh: create library with initialization/cleanup functions
+zram.sh: For sanity check of CONFIG_ZRAM and to run zram01 and zram02
+
+Two functional tests: zram01 and zram02:
+zram01.sh: creates general purpose ram disks with ext4 filesystems
+zram02.sh: creates block device for swap
+
+Commands required for testing:
+ - bc
+ - dd
+ - free
+ - awk
+ - mkswap
+ - swapon
+ - swapoff
+ - mkfs/ mkfs.ext4
+
+For more information please refer:
+kernel-source-tree/Documentation/blockdev/zram.txt
diff --git a/tools/testing/selftests/zram/zram.sh b/tools/testing/selftests/zram/zram.sh
new file mode 100755
index 00000000000000..6ea4b6a5ccab61
--- /dev/null
+++ b/tools/testing/selftests/zram/zram.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+TCID="zram.sh"
+
+run_zram () {
+echo "--------------------"
+echo "running zram tests"
+echo "--------------------"
+./zram01.sh
+echo ""
+./zram02.sh
+}
+
+# check zram module exists
+MODULE_PATH=/lib/modules/`uname -r`/kernel/drivers/block/zram/zram.ko
+if [ -f $MODULE_PATH ]; then
+	run_zram
+elif [ -b /dev/zram0 ]; then
+	run_zram
+else
+	echo "$TCID : No zram.ko module or /dev/zram0 device file not found"
+	echo "$TCID : CONFIG_ZRAM is not set"
+	exit 1
+fi
diff --git a/tools/testing/selftests/zram/zram01.sh b/tools/testing/selftests/zram/zram01.sh
new file mode 100755
index 00000000000000..2a2475d4b201ac
--- /dev/null
+++ b/tools/testing/selftests/zram/zram01.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+# Copyright (c) 2015 Oracle and/or its affiliates. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
+# GNU General Public License for more details.
+#
+# Test creates several zram devices with different filesystems on them.
+# It fills each device with zeros and checks that compression works.
+#
+# Author: Alexey Kodanev <alexey.kodanev@oracle.com>
+# Modified: Naresh Kamboju <naresh.kamboju@linaro.org>
+
+TCID="zram01"
+ERR_CODE=0
+
+. ./zram_lib.sh
+
+# Test will create the following number of zram devices:
+dev_num=1
+# This is a list of parameters for zram devices.
+# Number of items must be equal to 'dev_num' parameter.
+zram_max_streams="2"
+
+# The zram sysfs node 'disksize' value can be either in bytes,
+# or you can use mem suffixes. But in some old kernels, mem
+# suffixes are not supported, for example, in RHEL6.6GA's kernel
+# layer, it uses strict_strtoull() to parse disksize which does
+# not support mem suffixes, in some newer kernels, they use
+# memparse() which supports mem suffixes. So here we just use
+# bytes to make sure everything works correctly.
+zram_sizes="2097152" # 2MB
+zram_mem_limits="2M"
+zram_filesystems="ext4"
+zram_algs="lzo"
+
+zram_fill_fs()
+{
+	local mem_free0=$(free -m | awk 'NR==2 {print $4}')
+
+	for i in $(seq 0 $(($dev_num - 1))); do
+		echo "fill zram$i..."
+		local b=0
+		while [ true ]; do
+			dd conv=notrunc if=/dev/zero of=zram${i}/file \
+				oflag=append count=1 bs=1024 status=none \
+				> /dev/null 2>&1 || break
+			b=$(($b + 1))
+		done
+		echo "zram$i can be filled with '$b' KB"
+	done
+
+	local mem_free1=$(free -m | awk 'NR==2 {print $4}')
+	local used_mem=$(($mem_free0 - $mem_free1))
+
+	local total_size=0
+	for sm in $zram_sizes; do
+		local s=$(echo $sm | sed 's/M//')
+		total_size=$(($total_size + $s))
+	done
+
+	echo "zram used ${used_mem}M, zram disk sizes ${total_size}M"
+
+	local v=$((100 * $total_size / $used_mem))
+
+	if [ "$v" -lt 100 ]; then
+		echo "FAIL compression ratio: 0.$v:1"
+		ERR_CODE=-1
+		zram_cleanup
+		return
+	fi
+
+	echo "zram compression ratio: $(echo "scale=2; $v / 100 " | bc):1: OK"
+}
+
+zram_load
+zram_max_streams
+zram_compress_alg
+zram_set_disksizes
+zram_set_memlimit
+zram_makefs
+zram_mount
+
+zram_fill_fs
+zram_cleanup
+zram_unload
+
+if [ $ERR_CODE -ne 0 ]; then
+	echo "$TCID : [FAIL]"
+else
+	echo "$TCID : [PASS]"
+fi
diff --git a/tools/testing/selftests/zram/zram02.sh b/tools/testing/selftests/zram/zram02.sh
new file mode 100755
index 00000000000000..96e82dcc195b42
--- /dev/null
+++ b/tools/testing/selftests/zram/zram02.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+# Copyright (c) 2015 Oracle and/or its affiliates. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
+# GNU General Public License for more details.
+#
+# Test checks that we can create swap zram device.
+#
+# Author: Alexey Kodanev <alexey.kodanev@oracle.com>
+# Modified: Naresh Kamboju <naresh.kamboju@linaro.org>
+
+TCID="zram02"
+ERR_CODE=0
+
+. ./zram_lib.sh
+
+# Test will create the following number of zram devices:
+dev_num=1
+# This is a list of parameters for zram devices.
+# Number of items must be equal to 'dev_num' parameter.
+zram_max_streams="2"
+
+# The zram sysfs node 'disksize' value can be either in bytes,
+# or you can use mem suffixes. But in some old kernels, mem
+# suffixes are not supported, for example, in RHEL6.6GA's kernel
+# layer, it uses strict_strtoull() to parse disksize which does
+# not support mem suffixes, in some newer kernels, they use
+# memparse() which supports mem suffixes. So here we just use
+# bytes to make sure everything works correctly.
+zram_sizes="1048576" # 1M
+zram_mem_limits="1M"
+
+zram_load
+zram_max_streams
+zram_set_disksizes
+zram_set_memlimit
+zram_makeswap
+zram_swapoff
+zram_cleanup
+zram_unload
+
+if [ $ERR_CODE -ne 0 ]; then
+	echo "$TCID : [FAIL]"
+else
+	echo "$TCID : [PASS]"
+fi
diff --git a/tools/testing/selftests/zram/zram_lib.sh b/tools/testing/selftests/zram/zram_lib.sh
new file mode 100755
index 00000000000000..3b6abf1942d728
--- /dev/null
+++ b/tools/testing/selftests/zram/zram_lib.sh
@@ -0,0 +1,222 @@
+#!/bin/sh
+# Copyright (c) 2015 Oracle and/or its affiliates. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
+# GNU General Public License for more details.
+#
+# Author: Alexey Kodanev <alexey.kodanev@oracle.com>
+# Modified: Naresh Kamboju <naresh.kamboju@linaro.org>
+
+MODULE=0
+dev_makeswap=-1
+dev_mounted=-1
+
+trap INT
+
+zram_cleanup()
+{
+	echo "zram cleanup"
+	local i=
+	for i in $(seq 0 $dev_makeswap); do
+		swapoff /dev/zram$i
+	done
+
+	for i in $(seq 0 $dev_mounted); do
+		umount /dev/zram$i
+	done
+
+	for i in $(seq 0 $(($dev_num - 1))); do
+		echo 1 > /sys/block/zram${i}/reset
+		rm -rf zram$i
+	done
+
+}
+
+zram_unload()
+{
+	if [ $MODULE -ne 0 ] ; then
+		echo "zram rmmod zram"
+		rmmod zram > /dev/null 2>&1
+	fi
+}
+
+zram_load()
+{
+	# check zram module exists
+	MODULE_PATH=/lib/modules/`uname -r`/kernel/drivers/block/zram/zram.ko
+	if [ -f $MODULE_PATH ]; then
+		MODULE=1
+		echo "create '$dev_num' zram device(s)"
+		modprobe zram num_devices=$dev_num
+		if [ $? -ne 0 ]; then
+			echo "failed to insert zram module"
+			exit 1
+		fi
+
+		dev_num_created=$(ls /dev/zram* | wc -w)
+
+		if [ "$dev_num_created" -ne "$dev_num" ]; then
+			echo "unexpected num of devices: $dev_num_created"
+			ERR_CODE=-1
+		else
+			echo "zram load module successful"
+		fi
+	elif [ -b /dev/zram0 ]; then
+		echo "/dev/zram0 device file found: OK"
+	else
+		echo "ERROR: No zram.ko module or no /dev/zram0 device found"
+		echo "$TCID : CONFIG_ZRAM is not set"
+		exit 1
+	fi
+}
+
+zram_max_streams()
+{
+	echo "set max_comp_streams to zram device(s)"
+
+	local i=0
+	for max_s in $zram_max_streams; do
+		local sys_path="/sys/block/zram${i}/max_comp_streams"
+		echo $max_s > $sys_path || \
+			echo "FAIL failed to set '$max_s' to $sys_path"
+		sleep 1
+		local max_streams=$(cat $sys_path)
+
+		[ "$max_s" -ne "$max_streams" ] && \
+			echo "FAIL can't set max_streams '$max_s', get $max_stream"
+
+		i=$(($i + 1))
+		echo "$sys_path = '$max_streams' ($i/$dev_num)"
+	done
+
+	echo "zram max streams: OK"
+}
+
+zram_compress_alg()
+{
+	echo "test that we can set compression algorithm"
+
+	local algs=$(cat /sys/block/zram0/comp_algorithm)
+	echo "supported algs: $algs"
+	local i=0
+	for alg in $zram_algs; do
+		local sys_path="/sys/block/zram${i}/comp_algorithm"
+		echo "$alg" >	$sys_path || \
+			echo "FAIL can't set '$alg' to $sys_path"
+		i=$(($i + 1))
+		echo "$sys_path = '$alg' ($i/$dev_num)"
+	done
+
+	echo "zram set compression algorithm: OK"
+}
+
+zram_set_disksizes()
+{
+	echo "set disk size to zram device(s)"
+	local i=0
+	for ds in $zram_sizes; do
+		local sys_path="/sys/block/zram${i}/disksize"
+		echo "$ds" >	$sys_path || \
+			echo "FAIL can't set '$ds' to $sys_path"
+
+		i=$(($i + 1))
+		echo "$sys_path = '$ds' ($i/$dev_num)"
+	done
+
+	echo "zram set disksizes: OK"
+}
+
+zram_set_memlimit()
+{
+	echo "set memory limit to zram device(s)"
+
+	local i=0
+	for ds in $zram_mem_limits; do
+		local sys_path="/sys/block/zram${i}/mem_limit"
+		echo "$ds" >	$sys_path || \
+			echo "FAIL can't set '$ds' to $sys_path"
+
+		i=$(($i + 1))
+		echo "$sys_path = '$ds' ($i/$dev_num)"
+	done
+
+	echo "zram set memory limit: OK"
+}
+
+zram_makeswap()
+{
+	echo "make swap with zram device(s)"
+	local i=0
+	for i in $(seq 0 $(($dev_num - 1))); do
+		mkswap /dev/zram$i > err.log 2>&1
+		if [ $? -ne 0 ]; then
+			cat err.log
+			echo "FAIL mkswap /dev/zram$1 failed"
+		fi
+
+		swapon /dev/zram$i > err.log 2>&1
+		if [ $? -ne 0 ]; then
+			cat err.log
+			echo "FAIL swapon /dev/zram$1 failed"
+		fi
+
+		echo "done with /dev/zram$i"
+		dev_makeswap=$i
+	done
+
+	echo "zram making zram mkswap and swapon: OK"
+}
+
+zram_swapoff()
+{
+	local i=
+	for i in $(seq 0 $dev_makeswap); do
+		swapoff /dev/zram$i > err.log 2>&1
+		if [ $? -ne 0 ]; then
+			cat err.log
+			echo "FAIL swapoff /dev/zram$i failed"
+		fi
+	done
+	dev_makeswap=-1
+
+	echo "zram swapoff: OK"
+}
+
+zram_makefs()
+{
+	local i=0
+	for fs in $zram_filesystems; do
+		# if requested fs not supported default it to ext2
+		which mkfs.$fs > /dev/null 2>&1 || fs=ext2
+
+		echo "make $fs filesystem on /dev/zram$i"
+		mkfs.$fs /dev/zram$i > err.log 2>&1
+		if [ $? -ne 0 ]; then
+			cat err.log
+			echo "FAIL failed to make $fs on /dev/zram$i"
+		fi
+		i=$(($i + 1))
+		echo "zram mkfs.$fs: OK"
+	done
+}
+
+zram_mount()
+{
+	local i=0
+	for i in $(seq 0 $(($dev_num - 1))); do
+		echo "mount /dev/zram$i"
+		mkdir zram$i
+		mount /dev/zram$i zram$i > /dev/null || \
+			echo "FAIL mount /dev/zram$i failed"
+		dev_mounted=$i
+	done
+
+	echo "zram mount of zram device(s): OK"
+}

From a7d0f078892ee5e737cbe79541353c630bc71651 Mon Sep 17 00:00:00 2001
From: Bamvor Jian Zhang <bamvor.zhangjian@linaro.org>
Date: Fri, 14 Aug 2015 21:43:35 +0800
Subject: [PATCH 431/734] selftests: check before install

When the test cases is not supported by the current architecture
the install files(TEST_PROGS, TEST_PROGS_EXTENDED and TEST_FILES)
will be empty. Check it before installation to dismiss a failure
reported by install program.

Signed-off-by: Bamvor Jian Zhang <bamvor.zhangjian@linaro.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/Makefile |  1 -
 tools/testing/selftests/lib.mk   | 13 ++++++++-----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 9763dd95b4c82c..b2179587c8fe04 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -72,7 +72,6 @@ ifdef INSTALL_PATH
 	@# Ask all targets to install their files
 	mkdir -p $(INSTALL_PATH)
 	for TARGET in $(TARGETS); do \
-		mkdir -p $(INSTALL_PATH)/$$TARGET ; \
 		make -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \
 	done;
 
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index ee412bab7ed4bd..97f1c674206635 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -12,11 +12,14 @@ run_tests: all
 	$(RUN_TESTS)
 
 define INSTALL_RULE
-	mkdir -p $(INSTALL_PATH)
-	@for TEST_DIR in $(TEST_DIRS); do\
-		cp -r $$TEST_DIR $(INSTALL_PATH); \
-	done;
-	install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
+	@if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then			\
+		mkdir -p $(INSTALL_PATH);								\
+		for TEST_DIR in $(TEST_DIRS); do							\
+			cp -r $$TEST_DIR $(INSTALL_PATH);						\
+		done;											\
+		echo "install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)";	\
+		install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES);		\
+	fi
 endef
 
 install: all

From 9fae100cbd1049057bbc3e3180a6de339b9b73a9 Mon Sep 17 00:00:00 2001
From: Bamvor Jian Zhang <bamvor.zhangjian@linaro.org>
Date: Fri, 14 Aug 2015 21:43:38 +0800
Subject: [PATCH 432/734] selftests: breakpoints: fix installing error on the
 architecture except x86

Signed-off-by: Bamvor Jian Zhang <bamvor.zhangjian@linaro.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/breakpoints/Makefile | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/tools/testing/selftests/breakpoints/Makefile b/tools/testing/selftests/breakpoints/Makefile
index 1822356402090d..d27108b4f2081f 100644
--- a/tools/testing/selftests/breakpoints/Makefile
+++ b/tools/testing/selftests/breakpoints/Makefile
@@ -1,22 +1,12 @@
 # Taken from perf makefile
 uname_M := $(shell uname -m 2>/dev/null || echo not)
-ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/)
-ifeq ($(ARCH),i386)
-        ARCH := x86
-endif
-ifeq ($(ARCH),x86_64)
-	ARCH := x86
-endif
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
 
-
-all:
 ifeq ($(ARCH),x86)
-	gcc breakpoint_test.c -o breakpoint_test
-else
-	echo "Not an x86 target, can't build breakpoints selftests"
+TEST_PROGS := breakpoint_test
 endif
 
-TEST_PROGS := breakpoint_test
+all:
 
 include ../lib.mk
 

From a06a7576526e10a99ea7721533e7f2df3e26baad Mon Sep 17 00:00:00 2001
From: yalin wang <yalin.wang2010@gmail.com>
Date: Thu, 27 Aug 2015 19:35:48 -0400
Subject: [PATCH 433/734] nvdimm: change to use generic kvfree()

Signed-off-by: yalin wang <yalin.wang2010@gmail.com>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/dimm_devs.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index c05eb807d674dc..651b8d19d324f3 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -241,10 +241,7 @@ void nvdimm_drvdata_release(struct kref *kref)
 		nvdimm_free_dpa(ndd, res);
 	nvdimm_bus_unlock(dev);
 
-	if (ndd->data && is_vmalloc_addr(ndd->data))
-		vfree(ndd->data);
-	else
-		kfree(ndd->data);
+	kvfree(ndd->data);
 	kfree(ndd);
 	put_device(dev);
 }

From 67a3e8fe90156d41cd480d3dfbb40f3bc007c262 Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Thu, 27 Aug 2015 13:14:20 -0600
Subject: [PATCH 434/734] nd_blk: change aperture mapping from WC to WB

This should result in a pretty sizeable performance gain for reads.  For
rough comparison I did some simple read testing using PMEM to compare
reads of write combining (WC) mappings vs write-back (WB).  This was
done on a random lab machine.

PMEM reads from a write combining mapping:
	# dd of=/dev/null if=/dev/pmem0 bs=4096 count=100000
	100000+0 records in
	100000+0 records out
	409600000 bytes (410 MB) copied, 9.2855 s, 44.1 MB/s

PMEM reads from a write-back mapping:
	# dd of=/dev/null if=/dev/pmem0 bs=4096 count=1000000
	1000000+0 records in
	1000000+0 records out
	4096000000 bytes (4.1 GB) copied, 3.44034 s, 1.2 GB/s

To be able to safely support a write-back aperture I needed to add
support for the "read flush" _DSM flag, as outlined in the DSM spec:

http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf

This flag tells the ND BLK driver that it needs to flush the cache lines
associated with the aperture after the aperture is moved but before any
new data is read.  This ensures that any stale cache lines from the
previous contents of the aperture will be discarded from the processor
cache, and the new data will be read properly from the DIMM.  We know
that the cache lines are clean and will be discarded without any
writeback because either a) the previous aperture operation was a read,
and we never modified the contents of the aperture, or b) the previous
aperture operation was a write and we must have written back the dirtied
contents of the aperture to the DIMM before the I/O was completed.

In order to add support for the "read flush" flag I needed to add a
generic routine to invalidate cache lines, mmio_flush_range().  This is
protected by the ARCH_HAS_MMIO_FLUSH Kconfig variable, and is currently
only supported on x86.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/x86/Kconfig                  |  1 +
 arch/x86/include/asm/cacheflush.h |  2 ++
 arch/x86/include/asm/io.h         |  2 --
 arch/x86/include/asm/pmem.h       |  2 ++
 drivers/acpi/Kconfig              |  1 +
 drivers/acpi/nfit.c               | 55 +++++++++++++++++--------------
 drivers/acpi/nfit.h               | 16 ++++++---
 lib/Kconfig                       |  3 ++
 tools/testing/nvdimm/Kbuild       |  2 ++
 tools/testing/nvdimm/test/iomap.c | 30 +++++++++++++++--
 tools/testing/nvdimm/test/nfit.c  | 10 ++++--
 11 files changed, 88 insertions(+), 36 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b3a1a5d77d92c2..5d4980e6bc4f08 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -28,6 +28,7 @@ config X86
 	select ARCH_HAS_FAST_MULTIPLIER
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_PMEM_API
+	select ARCH_HAS_MMIO_FLUSH
 	select ARCH_HAS_SG_CHAIN
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_MIGHT_HAVE_ACPI_PDC		if ACPI
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index 471418ac1ff951..e63aa38e85fb23 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -89,6 +89,8 @@ int set_pages_rw(struct page *page, int numpages);
 
 void clflush_cache_range(void *addr, unsigned int size);
 
+#define mmio_flush_range(addr, size) clflush_cache_range(addr, size)
+
 #ifdef CONFIG_DEBUG_RODATA
 void mark_rodata_ro(void);
 extern const int rodata_test_data;
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index d241fbd5c87b2f..83ec9b1d77cc17 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -248,8 +248,6 @@ static inline void flush_write_buffers(void)
 #endif
 }
 
-#define ARCH_MEMREMAP_PMEM MEMREMAP_WB
-
 #endif /* __KERNEL__ */
 
 extern void native_io_delay(void);
diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index a3a0df6545eef1..bb026c5adf8a88 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -18,6 +18,8 @@
 #include <asm/cpufeature.h>
 #include <asm/special_insns.h>
 
+#define ARCH_MEMREMAP_PMEM MEMREMAP_WB
+
 #ifdef CONFIG_ARCH_HAS_PMEM_API
 /**
  * arch_memcpy_to_pmem - copy data to persistent memory
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 114cf48085abd3..4baeb853e0c370 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -410,6 +410,7 @@ config ACPI_NFIT
 	tristate "ACPI NVDIMM Firmware Interface Table (NFIT)"
 	depends on PHYS_ADDR_T_64BIT
 	depends on BLK_DEV
+	depends on ARCH_HAS_MMIO_FLUSH
 	select LIBNVDIMM
 	help
 	  Infrastructure to probe ACPI 6 compliant platforms for
diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
index 7c2638f914a909..56fff014163699 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit.c
@@ -1017,7 +1017,7 @@ static u64 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw)
 	if (mmio->num_lines)
 		offset = to_interleave_offset(offset, mmio);
 
-	return readq(mmio->base + offset);
+	return readq(mmio->addr.base + offset);
 }
 
 static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
@@ -1042,11 +1042,11 @@ static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
 	if (mmio->num_lines)
 		offset = to_interleave_offset(offset, mmio);
 
-	writeq(cmd, mmio->base + offset);
+	writeq(cmd, mmio->addr.base + offset);
 	wmb_blk(nfit_blk);
 
 	if (nfit_blk->dimm_flags & ND_BLK_DCR_LATCH)
-		readq(mmio->base + offset);
+		readq(mmio->addr.base + offset);
 }
 
 static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
@@ -1078,11 +1078,16 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
 		}
 
 		if (rw)
-			memcpy_to_pmem(mmio->aperture + offset,
+			memcpy_to_pmem(mmio->addr.aperture + offset,
 					iobuf + copied, c);
-		else
+		else {
+			if (nfit_blk->dimm_flags & ND_BLK_READ_FLUSH)
+				mmio_flush_range((void __force *)
+					mmio->addr.aperture + offset, c);
+
 			memcpy_from_pmem(iobuf + copied,
-					mmio->aperture + offset, c);
+					mmio->addr.aperture + offset, c);
+		}
 
 		copied += c;
 		len -= c;
@@ -1129,7 +1134,10 @@ static void nfit_spa_mapping_release(struct kref *kref)
 
 	WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
 	dev_dbg(acpi_desc->dev, "%s: SPA%d\n", __func__, spa->range_index);
-	iounmap(spa_map->iomem);
+	if (spa_map->type == SPA_MAP_APERTURE)
+		memunmap((void __force *)spa_map->addr.aperture);
+	else
+		iounmap(spa_map->addr.base);
 	release_mem_region(spa->address, spa->length);
 	list_del(&spa_map->list);
 	kfree(spa_map);
@@ -1175,7 +1183,7 @@ static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
 	spa_map = find_spa_mapping(acpi_desc, spa);
 	if (spa_map) {
 		kref_get(&spa_map->kref);
-		return spa_map->iomem;
+		return spa_map->addr.base;
 	}
 
 	spa_map = kzalloc(sizeof(*spa_map), GFP_KERNEL);
@@ -1191,20 +1199,19 @@ static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
 	if (!res)
 		goto err_mem;
 
-	if (type == SPA_MAP_APERTURE) {
-		/*
-		 * TODO: memremap_pmem() support, but that requires cache
-		 * flushing when the aperture is moved.
-		 */
-		spa_map->iomem = ioremap_wc(start, n);
-	} else
-		spa_map->iomem = ioremap_nocache(start, n);
+	spa_map->type = type;
+	if (type == SPA_MAP_APERTURE)
+		spa_map->addr.aperture = (void __pmem *)memremap(start, n,
+							ARCH_MEMREMAP_PMEM);
+	else
+		spa_map->addr.base = ioremap_nocache(start, n);
+
 
-	if (!spa_map->iomem)
+	if (!spa_map->addr.base)
 		goto err_map;
 
 	list_add_tail(&spa_map->list, &acpi_desc->spa_maps);
-	return spa_map->iomem;
+	return spa_map->addr.base;
 
  err_map:
 	release_mem_region(start, n);
@@ -1267,7 +1274,7 @@ static int acpi_nfit_blk_get_flags(struct nvdimm_bus_descriptor *nd_desc,
 		nfit_blk->dimm_flags = flags.flags;
 	else if (rc == -ENOTTY) {
 		/* fall back to a conservative default */
-		nfit_blk->dimm_flags = ND_BLK_DCR_LATCH;
+		nfit_blk->dimm_flags = ND_BLK_DCR_LATCH | ND_BLK_READ_FLUSH;
 		rc = 0;
 	} else
 		rc = -ENXIO;
@@ -1307,9 +1314,9 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
 	/* map block aperture memory */
 	nfit_blk->bdw_offset = nfit_mem->bdw->offset;
 	mmio = &nfit_blk->mmio[BDW];
-	mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw,
+	mmio->addr.base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw,
 			SPA_MAP_APERTURE);
-	if (!mmio->base) {
+	if (!mmio->addr.base) {
 		dev_dbg(dev, "%s: %s failed to map bdw\n", __func__,
 				nvdimm_name(nvdimm));
 		return -ENOMEM;
@@ -1330,9 +1337,9 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
 	nfit_blk->cmd_offset = nfit_mem->dcr->command_offset;
 	nfit_blk->stat_offset = nfit_mem->dcr->status_offset;
 	mmio = &nfit_blk->mmio[DCR];
-	mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr,
+	mmio->addr.base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr,
 			SPA_MAP_CONTROL);
-	if (!mmio->base) {
+	if (!mmio->addr.base) {
 		dev_dbg(dev, "%s: %s failed to map dcr\n", __func__,
 				nvdimm_name(nvdimm));
 		return -ENOMEM;
@@ -1399,7 +1406,7 @@ static void acpi_nfit_blk_region_disable(struct nvdimm_bus *nvdimm_bus,
 	for (i = 0; i < 2; i++) {
 		struct nfit_blk_mmio *mmio = &nfit_blk->mmio[i];
 
-		if (mmio->base)
+		if (mmio->addr.base)
 			nfit_spa_unmap(acpi_desc, mmio->spa);
 	}
 	nd_blk_region_set_provider_data(ndbr, NULL);
diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h
index f2c2bb751882c3..7e740156b9c299 100644
--- a/drivers/acpi/nfit.h
+++ b/drivers/acpi/nfit.h
@@ -41,6 +41,7 @@ enum nfit_uuids {
 };
 
 enum {
+	ND_BLK_READ_FLUSH = 1,
 	ND_BLK_DCR_LATCH = 2,
 };
 
@@ -117,12 +118,16 @@ enum nd_blk_mmio_selector {
 	DCR,
 };
 
+struct nd_blk_addr {
+	union {
+		void __iomem *base;
+		void __pmem  *aperture;
+	};
+};
+
 struct nfit_blk {
 	struct nfit_blk_mmio {
-		union {
-			void __iomem *base;
-			void __pmem  *aperture;
-		};
+		struct nd_blk_addr addr;
 		u64 size;
 		u64 base_offset;
 		u32 line_size;
@@ -149,7 +154,8 @@ struct nfit_spa_mapping {
 	struct acpi_nfit_system_address *spa;
 	struct list_head list;
 	struct kref kref;
-	void __iomem *iomem;
+	enum spa_map_type type;
+	struct nd_blk_addr addr;
 };
 
 static inline struct nfit_spa_mapping *to_spa_map(struct kref *kref)
diff --git a/lib/Kconfig b/lib/Kconfig
index 3a2ef67db6c724..a938a39191b3c5 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -531,4 +531,7 @@ config ARCH_HAS_SG_CHAIN
 config ARCH_HAS_PMEM_API
 	bool
 
+config ARCH_HAS_MMIO_FLUSH
+	bool
+
 endmenu
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild
index 04c5fc09576de9..94a5e0eda2d218 100644
--- a/tools/testing/nvdimm/Kbuild
+++ b/tools/testing/nvdimm/Kbuild
@@ -1,8 +1,10 @@
 ldflags-y += --wrap=ioremap_wc
+ldflags-y += --wrap=memremap
 ldflags-y += --wrap=devm_ioremap_nocache
 ldflags-y += --wrap=devm_memremap
 ldflags-y += --wrap=ioremap_nocache
 ldflags-y += --wrap=iounmap
+ldflags-y += --wrap=memunmap
 ldflags-y += --wrap=__devm_request_region
 ldflags-y += --wrap=__request_region
 ldflags-y += --wrap=__release_region
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index ff1e0045886409..179d2289f3a828 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -89,12 +89,25 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
 	nfit_res = get_nfit_res(offset);
 	rcu_read_unlock();
 	if (nfit_res)
-		return (void __iomem *) nfit_res->buf + offset
-			- nfit_res->res->start;
+		return nfit_res->buf + offset - nfit_res->res->start;
 	return devm_memremap(dev, offset, size, flags);
 }
 EXPORT_SYMBOL(__wrap_devm_memremap);
 
+void *__wrap_memremap(resource_size_t offset, size_t size,
+		unsigned long flags)
+{
+	struct nfit_test_resource *nfit_res;
+
+	rcu_read_lock();
+	nfit_res = get_nfit_res(offset);
+	rcu_read_unlock();
+	if (nfit_res)
+		return nfit_res->buf + offset - nfit_res->res->start;
+	return memremap(offset, size, flags);
+}
+EXPORT_SYMBOL(__wrap_memremap);
+
 void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size)
 {
 	return __nfit_test_ioremap(offset, size, ioremap_nocache);
@@ -120,6 +133,19 @@ void __wrap_iounmap(volatile void __iomem *addr)
 }
 EXPORT_SYMBOL(__wrap_iounmap);
 
+void __wrap_memunmap(void *addr)
+{
+	struct nfit_test_resource *nfit_res;
+
+	rcu_read_lock();
+	nfit_res = get_nfit_res((unsigned long) addr);
+	rcu_read_unlock();
+	if (nfit_res)
+		return;
+	return memunmap(addr);
+}
+EXPORT_SYMBOL(__wrap_memunmap);
+
 static struct resource *nfit_test_request_region(struct device *dev,
 		struct resource *parent, resource_size_t start,
 		resource_size_t n, const char *name, int flags)
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 28dba918524e5b..021e6f97f33e7a 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -1029,9 +1029,13 @@ static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa,
 
 	lane = nd_region_acquire_lane(nd_region);
 	if (rw)
-		memcpy(mmio->base + dpa, iobuf, len);
-	else
-		memcpy(iobuf, mmio->base + dpa, len);
+		memcpy(mmio->addr.base + dpa, iobuf, len);
+	else {
+		memcpy(iobuf, mmio->addr.base + dpa, len);
+
+		/* give us some some coverage of the mmio_flush_range() API */
+		mmio_flush_range(mmio->addr.base + dpa, len);
+	}
 	nd_region_release_lane(nd_region, lane);
 
 	return 0;

From cb389b9c0e00c30c9daf20287f7d91e2466edbb1 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 7 Aug 2015 17:41:00 -0400
Subject: [PATCH 435/734] dax: drop size parameter to ->direct_access()

None of the implementations currently use it.  The common
bdev_direct_access() entry point handles all the size checks before
calling ->direct_access().

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/powerpc/sysdev/axonram.c | 2 +-
 drivers/block/brd.c           | 6 +-----
 drivers/nvdimm/pmem.c         | 2 +-
 drivers/s390/block/dcssblk.c  | 4 ++--
 fs/block_dev.c                | 2 +-
 include/linux/blkdev.h        | 2 +-
 6 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index a2be2a66dab6d1..4419c84ac15ae0 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -141,7 +141,7 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio)
  */
 static long
 axon_ram_direct_access(struct block_device *device, sector_t sector,
-		       void __pmem **kaddr, unsigned long *pfn, long size)
+		       void __pmem **kaddr, unsigned long *pfn)
 {
 	struct axon_ram_bank *bank = device->bd_disk->private_data;
 	loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT;
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index c96402fd156059..03c45c41bdfa84 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -371,7 +371,7 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector,
 
 #ifdef CONFIG_BLK_DEV_RAM_DAX
 static long brd_direct_access(struct block_device *bdev, sector_t sector,
-			void __pmem **kaddr, unsigned long *pfn, long size)
+			void __pmem **kaddr, unsigned long *pfn)
 {
 	struct brd_device *brd = bdev->bd_disk->private_data;
 	struct page *page;
@@ -384,10 +384,6 @@ static long brd_direct_access(struct block_device *bdev, sector_t sector,
 	*kaddr = (void __pmem *)page_address(page);
 	*pfn = page_to_pfn(page);
 
-	/*
-	 * TODO: If size > PAGE_SIZE, we could look to see if the next page in
-	 * the file happens to be mapped to the next page of physical RAM.
-	 */
 	return PAGE_SIZE;
 }
 #else
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index f3b629779266aa..3b5b9cb758b646 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -92,7 +92,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
 }
 
 static long pmem_direct_access(struct block_device *bdev, sector_t sector,
-		      void __pmem **kaddr, unsigned long *pfn, long size)
+		      void __pmem **kaddr, unsigned long *pfn)
 {
 	struct pmem_device *pmem = bdev->bd_disk->private_data;
 	size_t offset = sector << 9;
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 2c5a397b9f3e7e..8c027a9e4e8a4c 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -29,7 +29,7 @@ static int dcssblk_open(struct block_device *bdev, fmode_t mode);
 static void dcssblk_release(struct gendisk *disk, fmode_t mode);
 static void dcssblk_make_request(struct request_queue *q, struct bio *bio);
 static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum,
-			 void __pmem **kaddr, unsigned long *pfn, long size);
+			 void __pmem **kaddr, unsigned long *pfn);
 
 static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0";
 
@@ -879,7 +879,7 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio)
 
 static long
 dcssblk_direct_access (struct block_device *bdev, sector_t secnum,
-			void __pmem **kaddr, unsigned long *pfn, long size)
+			void __pmem **kaddr, unsigned long *pfn)
 {
 	struct dcssblk_dev_info *dev_info;
 	unsigned long offset, dev_sz;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 2345a9870e2ca7..3831e5691b322d 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -462,7 +462,7 @@ long bdev_direct_access(struct block_device *bdev, sector_t sector,
 	sector += get_start_sect(bdev);
 	if (sector % (PAGE_SIZE / 512))
 		return -EINVAL;
-	avail = ops->direct_access(bdev, sector, addr, pfn, size);
+	avail = ops->direct_access(bdev, sector, addr, pfn);
 	if (!avail)
 		return -ERANGE;
 	return min(avail, size);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c401ecdff9cb45..c22064f326b29c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1556,7 +1556,7 @@ struct block_device_operations {
 	int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	long (*direct_access)(struct block_device *, sector_t, void __pmem **,
-			unsigned long *pfn, long size);
+			unsigned long *pfn);
 	unsigned int (*check_events) (struct gendisk *disk,
 				      unsigned int clearing);
 	/* ->media_changed() is DEPRECATED, use ->check_events() instead */

From 012dcef3f058385268630c0003e9b7f8dcafbeb4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 7 Aug 2015 17:41:01 -0400
Subject: [PATCH 436/734] mm: move __phys_to_pfn and __pfn_to_phys to
 asm/generic/memory_model.h

Three architectures already define these, and we'll need them genericly
soon.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/arm/include/asm/memory.h       | 6 ------
 arch/arm64/include/asm/memory.h     | 6 ------
 arch/unicore32/include/asm/memory.h | 6 ------
 include/asm-generic/memory_model.h  | 6 ++++++
 4 files changed, 6 insertions(+), 18 deletions(-)

diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index b7f6fb462ea0da..98d58bb04ac578 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -118,12 +118,6 @@
 #define DTCM_OFFSET	UL(0xfffe8000)
 #endif
 
-/*
- * Convert a physical address to a Page Frame Number and back
- */
-#define	__phys_to_pfn(paddr)	((unsigned long)((paddr) >> PAGE_SHIFT))
-#define	__pfn_to_phys(pfn)	((phys_addr_t)(pfn) << PAGE_SHIFT)
-
 /*
  * Convert a page to/from a physical address
  */
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index f800d45ea2265b..d808bb68875142 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -80,12 +80,6 @@
 #define __virt_to_phys(x)	(((phys_addr_t)(x) - PAGE_OFFSET + PHYS_OFFSET))
 #define __phys_to_virt(x)	((unsigned long)((x) - PHYS_OFFSET + PAGE_OFFSET))
 
-/*
- * Convert a physical address to a Page Frame Number and back
- */
-#define	__phys_to_pfn(paddr)	((unsigned long)((paddr) >> PAGE_SHIFT))
-#define	__pfn_to_phys(pfn)	((phys_addr_t)(pfn) << PAGE_SHIFT)
-
 /*
  * Convert a page to/from a physical address
  */
diff --git a/arch/unicore32/include/asm/memory.h b/arch/unicore32/include/asm/memory.h
index debafc40200a4f..3bb0a29fd2d7b1 100644
--- a/arch/unicore32/include/asm/memory.h
+++ b/arch/unicore32/include/asm/memory.h
@@ -60,12 +60,6 @@
 #define __phys_to_virt(x)	((x) - PHYS_OFFSET + PAGE_OFFSET)
 #endif
 
-/*
- * Convert a physical address to a Page Frame Number and back
- */
-#define	__phys_to_pfn(paddr)	((paddr) >> PAGE_SHIFT)
-#define	__pfn_to_phys(pfn)	((pfn) << PAGE_SHIFT)
-
 /*
  * Convert a page to/from a physical address
  */
diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h
index 14909b0b9cae71..f20f407ce45d29 100644
--- a/include/asm-generic/memory_model.h
+++ b/include/asm-generic/memory_model.h
@@ -69,6 +69,12 @@
 })
 #endif /* CONFIG_FLATMEM/DISCONTIGMEM/SPARSEMEM */
 
+/*
+ * Convert a physical address to a Page Frame Number and back
+ */
+#define	__phys_to_pfn(paddr)	((unsigned long)((paddr) >> PAGE_SHIFT))
+#define	__pfn_to_phys(pfn)	((pfn) << PAGE_SHIFT)
+
 #define page_to_pfn __page_to_pfn
 #define pfn_to_page __pfn_to_page
 

From 033fbae988fcb67e5077203512181890848b8e90 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sun, 9 Aug 2015 15:29:06 -0400
Subject: [PATCH 437/734] mm: ZONE_DEVICE for "device memory"

While pmem is usable as a block device or via DAX mappings to userspace
there are several usage scenarios that can not target pmem due to its
lack of struct page coverage. In preparation for "hot plugging" pmem
into the vmemmap add ZONE_DEVICE as a new zone to tag these pages
separately from the ones that are subject to standard page allocations.
Importantly "device memory" can be removed at will by userspace
unbinding the driver of the device.

Having a separate zone prevents allocation and otherwise marks these
pages that are distinct from typical uniform memory.  Device memory has
different lifetime and performance characteristics than RAM.  However,
since we have run out of ZONES_SHIFT bits this functionality currently
depends on sacrificing ZONE_DMA.

Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Jerome Glisse <j.glisse@gmail.com>
[hch: various simplifications in the arch interface]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/ia64/mm/init.c            |  4 ++--
 arch/powerpc/mm/mem.c          |  4 ++--
 arch/s390/mm/init.c            |  2 +-
 arch/sh/mm/init.c              |  5 +++--
 arch/tile/mm/init.c            |  2 +-
 arch/x86/mm/init_32.c          |  4 ++--
 arch/x86/mm/init_64.c          |  4 ++--
 include/linux/memory_hotplug.h |  5 +++--
 include/linux/mmzone.h         | 23 +++++++++++++++++++++++
 mm/Kconfig                     | 17 +++++++++++++++++
 mm/memory_hotplug.c            | 14 +++++++++++---
 mm/page_alloc.c                |  3 +++
 12 files changed, 70 insertions(+), 17 deletions(-)

diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 97e48b0eefc7c1..1841ef69183d87 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -645,7 +645,7 @@ mem_init (void)
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size)
+int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 {
 	pg_data_t *pgdat;
 	struct zone *zone;
@@ -656,7 +656,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
 	pgdat = NODE_DATA(nid);
 
 	zone = pgdat->node_zones +
-		zone_for_memory(nid, start, size, ZONE_NORMAL);
+		zone_for_memory(nid, start, size, ZONE_NORMAL, for_device);
 	ret = __add_pages(nid, zone, start_pfn, nr_pages);
 
 	if (ret)
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 0f11819d8f1dc0..6571cfb056686f 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -113,7 +113,7 @@ int memory_add_physaddr_to_nid(u64 start)
 }
 #endif
 
-int arch_add_memory(int nid, u64 start, u64 size)
+int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 {
 	struct pglist_data *pgdata;
 	struct zone *zone;
@@ -128,7 +128,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
 
 	/* this should work for most non-highmem platforms */
 	zone = pgdata->node_zones +
-		zone_for_memory(nid, start, size, 0);
+		zone_for_memory(nid, start, size, 0, for_device);
 
 	return __add_pages(nid, zone, start_pfn, nr_pages);
 }
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 76e873748b56e9..48ee78be88ba6c 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -168,7 +168,7 @@ void __init free_initrd_mem(unsigned long start, unsigned long end)
 #endif
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size)
+int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 {
 	unsigned long zone_start_pfn, zone_end_pfn, nr_pages;
 	unsigned long start_pfn = PFN_DOWN(start);
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 2790b6a64157f7..c1490096b8637f 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -485,7 +485,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 #endif
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size)
+int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 {
 	pg_data_t *pgdat;
 	unsigned long start_pfn = start >> PAGE_SHIFT;
@@ -496,7 +496,8 @@ int arch_add_memory(int nid, u64 start, u64 size)
 
 	/* We only have ZONE_NORMAL, so this is easy.. */
 	ret = __add_pages(nid, pgdat->node_zones +
-			zone_for_memory(nid, start, size, ZONE_NORMAL),
+			zone_for_memory(nid, start, size, ZONE_NORMAL,
+			for_device),
 			start_pfn, nr_pages);
 	if (unlikely(ret))
 		printk("%s: Failed, __add_pages() == %d\n", __func__, ret);
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index 5bd252e3fdc506..d4e1fc41d06db2 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c
@@ -863,7 +863,7 @@ void __init mem_init(void)
  * memory to the highmem for now.
  */
 #ifndef CONFIG_NEED_MULTIPLE_NODES
-int arch_add_memory(u64 start, u64 size)
+int arch_add_memory(u64 start, u64 size, bool for_device)
 {
 	struct pglist_data *pgdata = &contig_page_data;
 	struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 8340e45c891a1d..2a9237d20a701e 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -822,11 +822,11 @@ void __init mem_init(void)
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size)
+int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 {
 	struct pglist_data *pgdata = NODE_DATA(nid);
 	struct zone *zone = pgdata->node_zones +
-		zone_for_memory(nid, start, size, ZONE_HIGHMEM);
+		zone_for_memory(nid, start, size, ZONE_HIGHMEM, for_device);
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
 
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 3fba623e3ba558..30564e2752d361 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -687,11 +687,11 @@ static void  update_end_of_memory_vars(u64 start, u64 size)
  * Memory is added always to NORMAL zone. This means you will never get
  * additional DMA/DMA32 memory.
  */
-int arch_add_memory(int nid, u64 start, u64 size)
+int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 {
 	struct pglist_data *pgdat = NODE_DATA(nid);
 	struct zone *zone = pgdat->node_zones +
-		zone_for_memory(nid, start, size, ZONE_NORMAL);
+		zone_for_memory(nid, start, size, ZONE_NORMAL, for_device);
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
 	int ret;
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 6ffa0ac7f7d62a..8f60e899b33c57 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -266,8 +266,9 @@ static inline void remove_memory(int nid, u64 start, u64 size) {}
 extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
 		void *arg, int (*func)(struct memory_block *, void *));
 extern int add_memory(int nid, u64 start, u64 size);
-extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default);
-extern int arch_add_memory(int nid, u64 start, u64 size);
+extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default,
+		bool for_device);
+extern int arch_add_memory(int nid, u64 start, u64 size, bool for_device);
 extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
 extern bool is_memblock_offlined(struct memory_block *mem);
 extern void remove_memory(int nid, u64 start, u64 size);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 754c25966a0a78..9217fd93c25b41 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -319,7 +319,11 @@ enum zone_type {
 	ZONE_HIGHMEM,
 #endif
 	ZONE_MOVABLE,
+#ifdef CONFIG_ZONE_DEVICE
+	ZONE_DEVICE,
+#endif
 	__MAX_NR_ZONES
+
 };
 
 #ifndef __GENERATING_BOUNDS_H
@@ -794,6 +798,25 @@ static inline bool pgdat_is_empty(pg_data_t *pgdat)
 	return !pgdat->node_start_pfn && !pgdat->node_spanned_pages;
 }
 
+static inline int zone_id(const struct zone *zone)
+{
+	struct pglist_data *pgdat = zone->zone_pgdat;
+
+	return zone - pgdat->node_zones;
+}
+
+#ifdef CONFIG_ZONE_DEVICE
+static inline bool is_dev_zone(const struct zone *zone)
+{
+	return zone_id(zone) == ZONE_DEVICE;
+}
+#else
+static inline bool is_dev_zone(const struct zone *zone)
+{
+	return false;
+}
+#endif
+
 #include <linux/memory_hotplug.h>
 
 extern struct mutex zonelists_mutex;
diff --git a/mm/Kconfig b/mm/Kconfig
index e79de2bd12cd04..a0cd086df16bee 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -654,3 +654,20 @@ config DEFERRED_STRUCT_PAGE_INIT
 	  when kswapd starts. This has a potential performance impact on
 	  processes running early in the lifetime of the systemm until kswapd
 	  finishes the initialisation.
+
+config ZONE_DEVICE
+	bool "Device memory (pmem, etc...) hotplug support" if EXPERT
+	default !ZONE_DMA
+	depends on !ZONE_DMA
+	depends on MEMORY_HOTPLUG
+	depends on MEMORY_HOTREMOVE
+	depends on X86_64 #arch_add_memory() comprehends device memory
+
+	help
+	  Device memory hotplug support allows for establishing pmem,
+	  or other device driver discovered memory regions, in the
+	  memmap. This allows pfn_to_page() lookups of otherwise
+	  "device-physical" addresses which is needed for using a DAX
+	  mapping in an O_DIRECT operation, among other things.
+
+	  If FS_DAX is enabled, then say Y.
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 26fbba7d888f88..24e4c76c951be7 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -770,7 +770,10 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
 
 	start = phys_start_pfn << PAGE_SHIFT;
 	size = nr_pages * PAGE_SIZE;
-	ret = release_mem_region_adjustable(&iomem_resource, start, size);
+
+	/* in the ZONE_DEVICE case device driver owns the memory region */
+	if (!is_dev_zone(zone))
+		ret = release_mem_region_adjustable(&iomem_resource, start, size);
 	if (ret) {
 		resource_size_t endres = start + size - 1;
 
@@ -1207,8 +1210,13 @@ static int should_add_memory_movable(int nid, u64 start, u64 size)
 	return 0;
 }
 
-int zone_for_memory(int nid, u64 start, u64 size, int zone_default)
+int zone_for_memory(int nid, u64 start, u64 size, int zone_default,
+		bool for_device)
 {
+#ifdef CONFIG_ZONE_DEVICE
+	if (for_device)
+		return ZONE_DEVICE;
+#endif
 	if (should_add_memory_movable(nid, start, size))
 		return ZONE_MOVABLE;
 
@@ -1249,7 +1257,7 @@ int __ref add_memory(int nid, u64 start, u64 size)
 	}
 
 	/* call arch's memory hotadd */
-	ret = arch_add_memory(nid, start, size);
+	ret = arch_add_memory(nid, start, size, false);
 
 	if (ret < 0)
 		goto error;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ef19f22b2b7de1..0f19b4e1823390 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -207,6 +207,9 @@ static char * const zone_names[MAX_NR_ZONES] = {
 	 "HighMem",
 #endif
 	 "Movable",
+#ifdef CONFIG_ZONE_DEVICE
+	 "Device",
+#endif
 };
 
 int min_free_kbytes = 1024;

From 41e94a851304f7acac840adec4004f8aeee53ad4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 17 Aug 2015 16:00:35 +0200
Subject: [PATCH 438/734] add devm_memremap_pages

This behaves like devm_memremap except that it ensures we have page
structures available that can back the region.

Signed-off-by: Christoph Hellwig <hch@lst.de>
[djbw: catch attempts to remap RAM, drop flags]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/io.h | 20 +++++++++++++++++
 kernel/memremap.c  | 53 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 73 insertions(+)

diff --git a/include/linux/io.h b/include/linux/io.h
index d8d749abd6658a..de64c1e536125f 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -20,10 +20,13 @@
 
 #include <linux/types.h>
 #include <linux/init.h>
+#include <linux/bug.h>
+#include <linux/err.h>
 #include <asm/io.h>
 #include <asm/page.h>
 
 struct device;
+struct resource;
 
 __visible void __iowrite32_copy(void __iomem *to, const void *from, size_t count);
 void __iowrite64_copy(void __iomem *to, const void *from, size_t count);
@@ -84,6 +87,23 @@ void *devm_memremap(struct device *dev, resource_size_t offset,
 		size_t size, unsigned long flags);
 void devm_memunmap(struct device *dev, void *addr);
 
+void *__devm_memremap_pages(struct device *dev, struct resource *res);
+
+#ifdef CONFIG_ZONE_DEVICE
+void *devm_memremap_pages(struct device *dev, struct resource *res);
+#else
+static inline void *devm_memremap_pages(struct device *dev, struct resource *res)
+{
+	/*
+	 * Fail attempts to call devm_memremap_pages() without
+	 * ZONE_DEVICE support enabled, this requires callers to fall
+	 * back to plain devm_memremap() based on config
+	 */
+	WARN_ON_ONCE(1);
+	return ERR_PTR(-ENXIO);
+}
+#endif
+
 /*
  * Some systems do not have legacy ISA devices.
  * /dev/port is not a valid interface on these systems.
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 5c9b55eaf121a7..72b0c66628b6b3 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -14,6 +14,7 @@
 #include <linux/types.h>
 #include <linux/io.h>
 #include <linux/mm.h>
+#include <linux/memory_hotplug.h>
 
 #ifndef ioremap_cache
 /* temporary while we convert existing ioremap_cache users to memremap */
@@ -135,3 +136,55 @@ void devm_memunmap(struct device *dev, void *addr)
 	memunmap(addr);
 }
 EXPORT_SYMBOL(devm_memunmap);
+
+#ifdef CONFIG_ZONE_DEVICE
+struct page_map {
+	struct resource res;
+};
+
+static void devm_memremap_pages_release(struct device *dev, void *res)
+{
+	struct page_map *page_map = res;
+
+	/* pages are dead and unused, undo the arch mapping */
+	arch_remove_memory(page_map->res.start, resource_size(&page_map->res));
+}
+
+void *devm_memremap_pages(struct device *dev, struct resource *res)
+{
+	int is_ram = region_intersects(res->start, resource_size(res),
+			"System RAM");
+	struct page_map *page_map;
+	int error, nid;
+
+	if (is_ram == REGION_MIXED) {
+		WARN_ONCE(1, "%s attempted on mixed region %pr\n",
+				__func__, res);
+		return ERR_PTR(-ENXIO);
+	}
+
+	if (is_ram == REGION_INTERSECTS)
+		return __va(res->start);
+
+	page_map = devres_alloc(devm_memremap_pages_release,
+			sizeof(*page_map), GFP_KERNEL);
+	if (!page_map)
+		return ERR_PTR(-ENOMEM);
+
+	memcpy(&page_map->res, res, sizeof(*res));
+
+	nid = dev_to_node(dev);
+	if (nid < 0)
+		nid = 0;
+
+	error = arch_add_memory(nid, res->start, resource_size(res), true);
+	if (error) {
+		devres_free(page_map);
+		return ERR_PTR(error);
+	}
+
+	devres_add(dev, page_map);
+	return __va(res->start);
+}
+EXPORT_SYMBOL(devm_memremap_pages);
+#endif /* CONFIG_ZONE_DEVICE */

From 96601adb745186ccbcf5b078d4756f13381ec2af Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 24 Aug 2015 18:29:38 -0400
Subject: [PATCH 439/734] x86, pmem: clarify that ARCH_HAS_PMEM_API implies
 PMEM mapped WB

Given that a write-back (WB) mapping plus non-temporal stores is
expected to be the most efficient way to access PMEM, update the
definition of ARCH_HAS_PMEM_API to imply arch support for
WB-mapped-PMEM.  This is needed as a pre-requisite for adding PMEM to
the direct map and mapping it with struct page.

The above clarification for X86_64 means that memcpy_to_pmem() is
permitted to use the non-temporal arch_memcpy_to_pmem() rather than
needlessly fall back to default_memcpy_to_pmem() when the pcommit
instruction is not available.  When arch_memcpy_to_pmem() is not
guaranteed to flush writes out of cache, i.e. on older X86_32
implementations where non-temporal stores may just dirty cache,
ARCH_HAS_PMEM_API is simply disabled.

The default fall back for persistent memory handling remains.  Namely,
map it with the WT (write-through) cache-type and hope for the best.

arch_has_pmem_api() is updated to only indicate whether the arch
provides the proper helpers to meet the minimum "writes are visible
outside the cache hierarchy after memcpy_to_pmem() + wmb_pmem()".  Code
that cares whether wmb_pmem() actually flushes writes to pmem must now
call arch_has_wmb_pmem() directly.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
[hch: set ARCH_HAS_PMEM_API=n on x86_32]
Reviewed-by: Christoph Hellwig <hch@lst.de>
[toshi: x86_32 compile fixes]
Signed-off-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/x86/Kconfig            |  2 +-
 arch/x86/include/asm/pmem.h |  9 +--------
 drivers/acpi/nfit.c         |  3 ++-
 drivers/nvdimm/pmem.c       |  2 +-
 include/linux/pmem.h        | 36 ++++++++++++++++++++++--------------
 5 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 03ab6122325a52..ef4c6bbb3af15e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -27,7 +27,7 @@ config X86
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_FAST_MULTIPLIER
 	select ARCH_HAS_GCOV_PROFILE_ALL
-	select ARCH_HAS_PMEM_API
+	select ARCH_HAS_PMEM_API		if X86_64
 	select ARCH_HAS_MMIO_FLUSH
 	select ARCH_HAS_SG_CHAIN
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index bb026c5adf8a88..d8ce3ec816ab1a 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -18,8 +18,6 @@
 #include <asm/cpufeature.h>
 #include <asm/special_insns.h>
 
-#define ARCH_MEMREMAP_PMEM MEMREMAP_WB
-
 #ifdef CONFIG_ARCH_HAS_PMEM_API
 /**
  * arch_memcpy_to_pmem - copy data to persistent memory
@@ -143,18 +141,13 @@ static inline void arch_clear_pmem(void __pmem *addr, size_t size)
 	__arch_wb_cache_pmem(vaddr, size);
 }
 
-static inline bool arch_has_wmb_pmem(void)
+static inline bool __arch_has_wmb_pmem(void)
 {
-#ifdef CONFIG_X86_64
 	/*
 	 * We require that wmb() be an 'sfence', that is only guaranteed on
 	 * 64-bit builds
 	 */
 	return static_cpu_has(X86_FEATURE_PCOMMIT);
-#else
-	return false;
-#endif
 }
 #endif /* CONFIG_ARCH_HAS_PMEM_API */
-
 #endif /* __ASM_X86_PMEM_H__ */
diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
index 56fff014163699..f61e69fa2ad119 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit.c
@@ -20,6 +20,7 @@
 #include <linux/sort.h>
 #include <linux/pmem.h>
 #include <linux/io.h>
+#include <asm/cacheflush.h>
 #include "nfit.h"
 
 /*
@@ -1371,7 +1372,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
 			return -ENOMEM;
 	}
 
-	if (!arch_has_pmem_api() && !nfit_blk->nvdimm_flush)
+	if (!arch_has_wmb_pmem() && !nfit_blk->nvdimm_flush)
 		dev_warn(dev, "unable to guarantee persistence of writes\n");
 
 	if (mmio->line_size == 0)
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 3b5b9cb758b646..20bf122328da50 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -125,7 +125,7 @@ static struct pmem_device *pmem_alloc(struct device *dev,
 
 	pmem->phys_addr = res->start;
 	pmem->size = resource_size(res);
-	if (!arch_has_pmem_api())
+	if (!arch_has_wmb_pmem())
 		dev_warn(dev, "unable to guarantee persistence of writes\n");
 
 	if (!devm_request_mem_region(dev, pmem->phys_addr, pmem->size,
diff --git a/include/linux/pmem.h b/include/linux/pmem.h
index a9d84bf335eeea..85f810b339175f 100644
--- a/include/linux/pmem.h
+++ b/include/linux/pmem.h
@@ -17,16 +17,23 @@
 #include <linux/uio.h>
 
 #ifdef CONFIG_ARCH_HAS_PMEM_API
+#define ARCH_MEMREMAP_PMEM MEMREMAP_WB
 #include <asm/pmem.h>
 #else
-static inline void arch_wmb_pmem(void)
+#define ARCH_MEMREMAP_PMEM MEMREMAP_WT
+/*
+ * These are simply here to enable compilation, all call sites gate
+ * calling these symbols with arch_has_pmem_api() and redirect to the
+ * implementation in asm/pmem.h.
+ */
+static inline bool __arch_has_wmb_pmem(void)
 {
-	BUG();
+	return false;
 }
 
-static inline bool arch_has_wmb_pmem(void)
+static inline void arch_wmb_pmem(void)
 {
-	return false;
+	BUG();
 }
 
 static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
@@ -53,7 +60,6 @@ static inline void arch_clear_pmem(void __pmem *addr, size_t size)
  * implementations for arch_memcpy_to_pmem(), arch_wmb_pmem(),
  * arch_copy_from_iter_pmem(), arch_clear_pmem() and arch_has_wmb_pmem().
  */
-
 static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t size)
 {
 	memcpy(dst, (void __force const *) src, size);
@@ -64,8 +70,13 @@ static inline void memunmap_pmem(struct device *dev, void __pmem *addr)
 	devm_memunmap(dev, (void __force *) addr);
 }
 
+static inline bool arch_has_pmem_api(void)
+{
+	return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API);
+}
+
 /**
- * arch_has_pmem_api - true if wmb_pmem() ensures durability
+ * arch_has_wmb_pmem - true if wmb_pmem() ensures durability
  *
  * For a given cpu implementation within an architecture it is possible
  * that wmb_pmem() resolves to a nop.  In the case this returns
@@ -73,9 +84,9 @@ static inline void memunmap_pmem(struct device *dev, void __pmem *addr)
  * fall back to a different data consistency model, or otherwise notify
  * the user.
  */
-static inline bool arch_has_pmem_api(void)
+static inline bool arch_has_wmb_pmem(void)
 {
-	return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API) && arch_has_wmb_pmem();
+	return arch_has_pmem_api() && __arch_has_wmb_pmem();
 }
 
 /*
@@ -120,13 +131,8 @@ static inline void default_clear_pmem(void __pmem *addr, size_t size)
 static inline void __pmem *memremap_pmem(struct device *dev,
 		resource_size_t offset, unsigned long size)
 {
-#ifdef ARCH_MEMREMAP_PMEM
 	return (void __pmem *) devm_memremap(dev, offset, size,
 			ARCH_MEMREMAP_PMEM);
-#else
-	return (void __pmem *) devm_memremap(dev, offset, size,
-			MEMREMAP_WT);
-#endif
 }
 
 /**
@@ -158,8 +164,10 @@ static inline void memcpy_to_pmem(void __pmem *dst, const void *src, size_t n)
  */
 static inline void wmb_pmem(void)
 {
-	if (arch_has_pmem_api())
+	if (arch_has_wmb_pmem())
 		arch_wmb_pmem();
+	else
+		wmb();
 }
 
 /**

From 56d4a3814690a62a3fd9724844e23ea152ce938c Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 20 Aug 2015 11:53:27 +0200
Subject: [PATCH 440/734] scripts: add stackusage script

The current checkstack.pl script has a few problems, stemming from the
overly simplistic attempt at parsing objdump output with regular
expressions: For example, on x86_64 it doesn't take the push
instruction into account, making it consistently underestimate the
real stack use, and it also doesn't capture stack pointer adjustments
of exactly 128 bytes [1].

Since newer gcc (>= 4.6) knows about -fstack-usage, we might as well
take the information straight from the horse's mouth. This patch
introduces scripts/stackusage, which is a simple wrapper for running
make with KCFLAGS set to -fstack-usage. Example use is

scripts/stackusage -o out.su -j8 lib/

The script understands "-o foo" for writing to 'foo' and -h for a
trivial help text; anything else is passed to make.

Afterwards, we find all newly created .su files, massage them a
little, sort by stack use and write the result to a single output
file.

Note that the function names printed by (at least) gcc 4.7 are
sometimes useless. For example, the first three lines of out.su
generated above are

./lib/decompress_bunzip2.c:155  get_next_block  448     static
./lib/decompress_unlzma.c:537   unlzma  336     static
./lib/vsprintf.c:616    8       304     static

That function '8' is really the static symbol_string(), but it has
been subject to 'interprocedural scalar replacement of aggregates', so
its name in the object file is 'symbol_string.isra.8'. gcc 5.0 doesn't
have this problem; it uses the full name as seen in the object file.

[1] Since gcc encodes that by

48 83 c4 80             add    $0xffffffffffffff80,%rsp

and not

48 81 ec 80 00 00 00    sub    $0x80,%rsp

since -128 fits in an imm8.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Michal Marek <mmarek@suse.com>
---
 scripts/stackusage | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100755 scripts/stackusage

diff --git a/scripts/stackusage b/scripts/stackusage
new file mode 100755
index 00000000000000..8cf26640ef8a98
--- /dev/null
+++ b/scripts/stackusage
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+outfile=""
+now=`date +%s`
+
+while [ $# -gt 0 ]
+do
+    case "$1" in
+        -o)
+	    outfile="$2"
+	    shift 2;;
+	-h)
+	    echo "usage: $0 [-o outfile] <make options/args>"
+	    exit 0;;
+	*)  break;;
+    esac
+done
+
+if [ -z "$outfile" ]
+then
+    outfile=`mktemp --tmpdir stackusage.$$.XXXX`
+fi
+
+KCFLAGS="${KCFLAGS} -fstack-usage" make "$@"
+
+# Prepend directory name to file names, remove column information,
+# make file:line/function/size/type properly tab-separated.
+find . -name '*.su' -newermt "@${now}" -print |                     \
+    xargs perl -MFile::Basename -pe                                 \
+        '$d = dirname($ARGV); s#([^:]+:[0-9]+):[0-9]+:#$d/$1\t#;' | \
+    sort -k3,3nr > "${outfile}"
+
+echo "$0: output written to ${outfile}"

From e25572878a70d9cd84020f577b7c45583b9d42ae Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 20 Aug 2015 11:53:28 +0200
Subject: [PATCH 441/734] .gitignore: add *.su pattern

Ignore the *.su files generated by using the gcc option -fstack-usage.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Michal Marek <mmarek@suse.com>
---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 98b91fccff45fc..f0efae5e2963db 100644
--- a/.gitignore
+++ b/.gitignore
@@ -36,6 +36,7 @@
 modules.builtin
 Module.symvers
 *.dwo
+*.su
 
 #
 # Top-level generic files

From d523b255de60880304038006de5f93f2db612ad6 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 20 Aug 2015 11:53:29 +0200
Subject: [PATCH 442/734] kbuild: remove *.su files generated by -fstack-usage

Make sure 'make clean' removes *.su files generated by the gcc option
-fstack-usage.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Michal Marek <mmarek@suse.com>
---
 Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Makefile b/Makefile
index 7ff1239f9cd2cb..f618530a38701b 100644
--- a/Makefile
+++ b/Makefile
@@ -1430,6 +1430,7 @@ clean: $(clean-dirs)
 		\( -name '*.[oas]' -o -name '*.ko' -o -name '.*.cmd' \
 		-o -name '*.ko.*' \
 		-o -name '*.dwo'  \
+		-o -name '*.su'  \
 		-o -name '.*.d' -o -name '.*.tmp' -o -name '*.mod.c' \
 		-o -name '*.symtypes' -o -name 'modules.order' \
 		-o -name modules.builtin -o -name '.tmp_*.o.*' \

From 5bbb9f753afe213aebc8dba30c7e2dbb73616b79 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 20 Aug 2015 11:53:30 +0200
Subject: [PATCH 443/734] scripts: add stackdelta script

This adds a simple perl script for reading two files as produced by
the stackusage script and computing the changes in stack usage. For
example:

$ scripts/stackusage -o /tmp/old.su CC=gcc-4.7 -j8 fs/ext4/
$ scripts/stackusage -o /tmp/new.su CC=gcc-5.0 -j8 fs/ext4/
$ scripts/stackdelta /tmp/{old,new}.su | sort -k5,5g

shows that gcc 5.0 generally produces less stack-hungry code than gcc
4.7. Obviously, the script can also be used for measuring the effect
of commits, .config tweaks or whatnot.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Michal Marek <mmarek@suse.com>
---
 scripts/stackdelta | 59 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100755 scripts/stackdelta

diff --git a/scripts/stackdelta b/scripts/stackdelta
new file mode 100755
index 00000000000000..48eabf2f48f852
--- /dev/null
+++ b/scripts/stackdelta
@@ -0,0 +1,59 @@
+#!/usr/bin/perl
+
+# Read two files produced by the stackusage script, and show the
+# delta between them.
+#
+# Currently, only shows changes for functions listed in both files. We
+# could add an option to show also functions which have vanished or
+# appeared (which would often be due to gcc making other inlining
+# decisions).
+#
+# Another possible option would be a minimum absolute value for the
+# delta.
+#
+# A third possibility is for sorting by delta, but that can be
+# achieved by piping to sort -k5,5g.
+
+sub read_stack_usage_file {
+    my %su;
+    my $f = shift;
+    open(my $fh, '<', $f)
+	or die "cannot open $f: $!";
+    while (<$fh>) {
+	chomp;
+	my ($file, $func, $size, $type) = split;
+	# Old versions of gcc (at least 4.7) have an annoying quirk in
+	# that a (static) function whose name has been changed into
+	# for example ext4_find_unwritten_pgoff.isra.11 will show up
+	# in the .su file with a name of just "11". Since such a
+	# numeric suffix is likely to change across different
+	# commits/compilers/.configs or whatever else we're trying to
+	# tweak, we can't really track those functions, so we just
+	# silently skip them.
+	#
+	# Newer gcc (at least 5.0) report the full name, so again,
+	# since the suffix is likely to change, we strip it.
+	next if $func =~ m/^[0-9]+$/;
+	$func =~ s/\..*$//;
+	# Line numbers are likely to change; strip those.
+	$file =~ s/:[0-9]+$//;
+	$su{"${file}\t${func}"} = {size => $size, type => $type};
+    }
+    close($fh);
+    return \%su;
+}
+
+@ARGV == 2
+    or die "usage: $0 <old> <new>";
+
+my $old = read_stack_usage_file($ARGV[0]);
+my $new = read_stack_usage_file($ARGV[1]);
+my @common = sort grep {exists $new->{$_}} keys %$old;
+for (@common) {
+    my $x = $old->{$_}{size};
+    my $y = $new->{$_}{size};
+    my $delta = $y - $x;
+    if ($delta) {
+	printf "%s\t%d\t%d\t%+d\n", $_, $x, $y, $delta;
+    }
+}

From efd85cf86a56898686187397adfd52dfac79aff2 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 26 Aug 2015 11:13:38 -0700
Subject: [PATCH 444/734] thinkpad_acpi: Remove side effects from vdbg_printk
 -> no_printk macro

vdbg_printk when not using CONFIG_THINKPAD_ACPI_DEBUG uses
no_printk which produces no logging output but always
evaluates arguments.

Change the macro to surround the no_printk call with
	do { if (0) no_printk(...); } while (0)
to avoid the unnecessary argument evaluations.

$ size drivers/platform/x86/thinkpad_acpi.o*
   text	   data	    bss	    dec	    hex	filename
  60918	   6184	    824	  67926	  10956	drivers/platform/x86/thinkpad_acpi.o.new
  60927	   6184	    824	  67935	  1095f	drivers/platform/x86/thinkpad_acpi.o.old

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Signed-off-by: Darren Hart <dvhart@linux.intel.com>
---
 drivers/platform/x86/thinkpad_acpi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 33e488cf556986..131dd74641833e 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -402,7 +402,7 @@ static const char *str_supported(int is_supported);
 #else
 static inline const char *str_supported(int is_supported) { return ""; }
 #define vdbg_printk(a_dbg_level, format, arg...)	\
-	no_printk(format, ##arg)
+	do { if (0) no_printk(format, ##arg); } while (0)
 #endif
 
 static void tpacpi_log_usertask(const char * const what)

From 628b3198ccc235e387c0b8ee200c10c883e86644 Mon Sep 17 00:00:00 2001
From: Michael Powell <michael_powell@mentor.com>
Date: Sun, 2 Aug 2015 22:59:29 +0000
Subject: [PATCH 445/734] acer-wmi: No rfkill on HP Omen 15 wifi

Prevents acer-wmi from blocking wifi on HP Omen 15 laptops

Signed-off-by: Michael Powell <michael_powell@mentor.com>
[david.ward@ll.mit.edu: Resubmitted clean patch]
Signed-off-by: Darren Hart <dvhart@linux.intel.com>
---
 drivers/platform/x86/acer-wmi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c
index f6b280dbfb3331..d773b9dc48a0ad 100644
--- a/drivers/platform/x86/acer-wmi.c
+++ b/drivers/platform/x86/acer-wmi.c
@@ -807,6 +807,7 @@ static const struct acpi_device_id norfkill_ids[] __initconst = {
 	{ "IBM0068", 0},
 	{ "LEN0068", 0},
 	{ "SNY5001", 0},	/* sony-laptop in charge */
+	{ "HPQ6601", 0},
 	{ "", 0},
 };
 

From e1455744b27c9e6115c3508a7b2902157c2c4347 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 30 Jul 2015 17:57:47 -0400
Subject: [PATCH 446/734] libnvdimm, pfn: 'struct page' provider infrastructure

Implement the base infrastructure for libnvdimm PFN devices. Similar to
BTT devices they take a namespace as a backing device and layer
functionality on top. In this case the functionality is reserving space
for an array of 'struct page' entries to be handed out through
pfn_to_page(). For now this is just the basic libnvdimm-device-model for
configuring the base PFN device.

As the namespace claiming mechanism for PFN devices is mostly identical
to BTT devices drivers/nvdimm/claim.c is created to house the common
bits.

Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/Kconfig          |  22 +++
 drivers/nvdimm/Makefile         |   2 +
 drivers/nvdimm/btt.c            |   6 +-
 drivers/nvdimm/btt_devs.c       | 172 +---------------
 drivers/nvdimm/claim.c          | 201 +++++++++++++++++++
 drivers/nvdimm/namespace_devs.c |  34 +++-
 drivers/nvdimm/nd-core.h        |   9 +
 drivers/nvdimm/nd.h             |  56 +++++-
 drivers/nvdimm/pfn.h            |  35 ++++
 drivers/nvdimm/pfn_devs.c       | 336 ++++++++++++++++++++++++++++++++
 drivers/nvdimm/region.c         |   2 +
 drivers/nvdimm/region_devs.c    |  19 ++
 tools/testing/nvdimm/Kbuild     |   2 +
 13 files changed, 719 insertions(+), 177 deletions(-)
 create mode 100644 drivers/nvdimm/claim.c
 create mode 100644 drivers/nvdimm/pfn.h
 create mode 100644 drivers/nvdimm/pfn_devs.c

diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
index 72226acb5c0fb8..ace25b53b75565 100644
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@@ -21,6 +21,7 @@ config BLK_DEV_PMEM
 	default LIBNVDIMM
 	depends on HAS_IOMEM
 	select ND_BTT if BTT
+	select ND_PFN if NVDIMM_PFN
 	help
 	  Memory ranges for PMEM are described by either an NFIT
 	  (NVDIMM Firmware Interface Table, see CONFIG_NFIT_ACPI), a
@@ -47,12 +48,16 @@ config ND_BLK
 	  (CONFIG_ACPI_NFIT), or otherwise exposes BLK-mode
 	  capabilities.
 
+config ND_CLAIM
+	bool
+
 config ND_BTT
 	tristate
 
 config BTT
 	bool "BTT: Block Translation Table (atomic sector updates)"
 	default y if LIBNVDIMM
+	select ND_CLAIM
 	help
 	  The Block Translation Table (BTT) provides atomic sector
 	  update semantics for persistent memory devices, so that
@@ -65,4 +70,21 @@ config BTT
 
 	  Select Y if unsure
 
+config ND_PFN
+	tristate
+
+config NVDIMM_PFN
+	bool "PFN: Map persistent (device) memory"
+	default LIBNVDIMM
+	select ND_CLAIM
+	help
+	  Map persistent memory, i.e. advertise it to the memory
+	  management sub-system.  By default persistent memory does
+	  not support direct I/O, RDMA, or any other usage that
+	  requires a 'struct page' to mediate an I/O request.  This
+	  driver allocates and initializes the infrastructure needed
+	  to support those use cases.
+
+	  Select Y if unsure
+
 endif
diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile
index 9bf15db52dee4f..ea84d3c4e8e5aa 100644
--- a/drivers/nvdimm/Makefile
+++ b/drivers/nvdimm/Makefile
@@ -20,4 +20,6 @@ libnvdimm-y += region_devs.o
 libnvdimm-y += region.o
 libnvdimm-y += namespace_devs.o
 libnvdimm-y += label.o
+libnvdimm-$(CONFIG_ND_CLAIM) += claim.o
 libnvdimm-$(CONFIG_BTT) += btt_devs.o
+libnvdimm-$(CONFIG_NVDIMM_PFN) += pfn_devs.o
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 19588291550b54..028d2d137bc58a 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -731,6 +731,7 @@ static int create_arenas(struct btt *btt)
 static int btt_arena_write_layout(struct arena_info *arena)
 {
 	int ret;
+	u64 sum;
 	struct btt_sb *super;
 	struct nd_btt *nd_btt = arena->nd_btt;
 	const u8 *parent_uuid = nd_dev_to_uuid(&nd_btt->ndns->dev);
@@ -770,7 +771,8 @@ static int btt_arena_write_layout(struct arena_info *arena)
 	super->info2off = cpu_to_le64(arena->info2off - arena->infooff);
 
 	super->flags = 0;
-	super->checksum = cpu_to_le64(nd_btt_sb_checksum(super));
+	sum = nd_sb_checksum((struct nd_gen_sb *) super);
+	super->checksum = cpu_to_le64(sum);
 
 	ret = btt_info_write(arena, super);
 
@@ -1422,8 +1424,6 @@ static int __init nd_btt_init(void)
 {
 	int rc;
 
-	BUILD_BUG_ON(sizeof(struct btt_sb) != SZ_4K);
-
 	btt_major = register_blkdev(0, "btt");
 	if (btt_major < 0)
 		return btt_major;
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index 242ae1c550ad6f..59ad54a63d9fa9 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -21,63 +21,13 @@
 #include "btt.h"
 #include "nd.h"
 
-static void __nd_btt_detach_ndns(struct nd_btt *nd_btt)
-{
-	struct nd_namespace_common *ndns = nd_btt->ndns;
-
-	dev_WARN_ONCE(&nd_btt->dev, !mutex_is_locked(&ndns->dev.mutex)
-			|| ndns->claim != &nd_btt->dev,
-			"%s: invalid claim\n", __func__);
-	ndns->claim = NULL;
-	nd_btt->ndns = NULL;
-	put_device(&ndns->dev);
-}
-
-static void nd_btt_detach_ndns(struct nd_btt *nd_btt)
-{
-	struct nd_namespace_common *ndns = nd_btt->ndns;
-
-	if (!ndns)
-		return;
-	get_device(&ndns->dev);
-	device_lock(&ndns->dev);
-	__nd_btt_detach_ndns(nd_btt);
-	device_unlock(&ndns->dev);
-	put_device(&ndns->dev);
-}
-
-static bool __nd_btt_attach_ndns(struct nd_btt *nd_btt,
-		struct nd_namespace_common *ndns)
-{
-	if (ndns->claim)
-		return false;
-	dev_WARN_ONCE(&nd_btt->dev, !mutex_is_locked(&ndns->dev.mutex)
-			|| nd_btt->ndns,
-			"%s: invalid claim\n", __func__);
-	ndns->claim = &nd_btt->dev;
-	nd_btt->ndns = ndns;
-	get_device(&ndns->dev);
-	return true;
-}
-
-static bool nd_btt_attach_ndns(struct nd_btt *nd_btt,
-		struct nd_namespace_common *ndns)
-{
-	bool claimed;
-
-	device_lock(&ndns->dev);
-	claimed = __nd_btt_attach_ndns(nd_btt, ndns);
-	device_unlock(&ndns->dev);
-	return claimed;
-}
-
 static void nd_btt_release(struct device *dev)
 {
 	struct nd_region *nd_region = to_nd_region(dev->parent);
 	struct nd_btt *nd_btt = to_nd_btt(dev);
 
 	dev_dbg(dev, "%s\n", __func__);
-	nd_btt_detach_ndns(nd_btt);
+	nd_detach_ndns(&nd_btt->dev, &nd_btt->ndns);
 	ida_simple_remove(&nd_region->btt_ida, nd_btt->id);
 	kfree(nd_btt->uuid);
 	kfree(nd_btt);
@@ -172,104 +122,15 @@ static ssize_t namespace_show(struct device *dev,
 	return rc;
 }
 
-static int namespace_match(struct device *dev, void *data)
-{
-	char *name = data;
-
-	return strcmp(name, dev_name(dev)) == 0;
-}
-
-static bool is_nd_btt_idle(struct device *dev)
-{
-	struct nd_region *nd_region = to_nd_region(dev->parent);
-	struct nd_btt *nd_btt = to_nd_btt(dev);
-
-	if (nd_region->btt_seed == dev || nd_btt->ndns || dev->driver)
-		return false;
-	return true;
-}
-
-static ssize_t __namespace_store(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t len)
-{
-	struct nd_btt *nd_btt = to_nd_btt(dev);
-	struct nd_namespace_common *ndns;
-	struct device *found;
-	char *name;
-
-	if (dev->driver) {
-		dev_dbg(dev, "%s: -EBUSY\n", __func__);
-		return -EBUSY;
-	}
-
-	name = kstrndup(buf, len, GFP_KERNEL);
-	if (!name)
-		return -ENOMEM;
-	strim(name);
-
-	if (strncmp(name, "namespace", 9) == 0 || strcmp(name, "") == 0)
-		/* pass */;
-	else {
-		len = -EINVAL;
-		goto out;
-	}
-
-	ndns = nd_btt->ndns;
-	if (strcmp(name, "") == 0) {
-		/* detach the namespace and destroy / reset the btt device */
-		nd_btt_detach_ndns(nd_btt);
-		if (is_nd_btt_idle(dev))
-			nd_device_unregister(dev, ND_ASYNC);
-		else {
-			nd_btt->lbasize = 0;
-			kfree(nd_btt->uuid);
-			nd_btt->uuid = NULL;
-		}
-		goto out;
-	} else if (ndns) {
-		dev_dbg(dev, "namespace already set to: %s\n",
-				dev_name(&ndns->dev));
-		len = -EBUSY;
-		goto out;
-	}
-
-	found = device_find_child(dev->parent, name, namespace_match);
-	if (!found) {
-		dev_dbg(dev, "'%s' not found under %s\n", name,
-				dev_name(dev->parent));
-		len = -ENODEV;
-		goto out;
-	}
-
-	ndns = to_ndns(found);
-	if (__nvdimm_namespace_capacity(ndns) < SZ_16M) {
-		dev_dbg(dev, "%s too small to host btt\n", name);
-		len = -ENXIO;
-		goto out_attach;
-	}
-
-	WARN_ON_ONCE(!is_nvdimm_bus_locked(&nd_btt->dev));
-	if (!nd_btt_attach_ndns(nd_btt, ndns)) {
-		dev_dbg(dev, "%s already claimed\n",
-				dev_name(&ndns->dev));
-		len = -EBUSY;
-	}
-
- out_attach:
-	put_device(&ndns->dev); /* from device_find_child */
- out:
-	kfree(name);
-	return len;
-}
-
 static ssize_t namespace_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t len)
 {
+	struct nd_btt *nd_btt = to_nd_btt(dev);
 	ssize_t rc;
 
 	nvdimm_bus_lock(dev);
 	device_lock(dev);
-	rc = __namespace_store(dev, attr, buf, len);
+	rc = nd_namespace_store(dev, &nd_btt->ndns, buf, len);
 	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
 			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
 	device_unlock(dev);
@@ -324,7 +185,7 @@ static struct device *__nd_btt_create(struct nd_region *nd_region,
 	dev->type = &nd_btt_device_type;
 	dev->groups = nd_btt_attribute_groups;
 	device_initialize(&nd_btt->dev);
-	if (ndns && !__nd_btt_attach_ndns(nd_btt, ndns)) {
+	if (ndns && !__nd_attach_ndns(&nd_btt->dev, ndns, &nd_btt->ndns)) {
 		dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n",
 				__func__, dev_name(ndns->claim));
 		put_device(dev);
@@ -375,7 +236,7 @@ bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super)
 
 	checksum = le64_to_cpu(super->checksum);
 	super->checksum = 0;
-	if (checksum != nd_btt_sb_checksum(super))
+	if (checksum != nd_sb_checksum((struct nd_gen_sb *) super))
 		return false;
 	super->checksum = cpu_to_le64(checksum);
 
@@ -387,25 +248,6 @@ bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super)
 }
 EXPORT_SYMBOL(nd_btt_arena_is_valid);
 
-/*
- * nd_btt_sb_checksum: compute checksum for btt info block
- *
- * Returns a fletcher64 checksum of everything in the given info block
- * except the last field (since that's where the checksum lives).
- */
-u64 nd_btt_sb_checksum(struct btt_sb *btt_sb)
-{
-	u64 sum;
-	__le64 sum_save;
-
-	sum_save = btt_sb->checksum;
-	btt_sb->checksum = 0;
-	sum = nd_fletcher64(btt_sb, sizeof(*btt_sb), 1);
-	btt_sb->checksum = sum_save;
-	return sum;
-}
-EXPORT_SYMBOL(nd_btt_sb_checksum);
-
 static int __nd_btt_probe(struct nd_btt *nd_btt,
 		struct nd_namespace_common *ndns, struct btt_sb *btt_sb)
 {
@@ -453,7 +295,9 @@ int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata)
 	dev_dbg(&ndns->dev, "%s: btt: %s\n", __func__,
 			rc == 0 ? dev_name(dev) : "<none>");
 	if (rc < 0) {
-		__nd_btt_detach_ndns(to_nd_btt(dev));
+		struct nd_btt *nd_btt = to_nd_btt(dev);
+
+		__nd_detach_ndns(dev, &nd_btt->ndns);
 		put_device(dev);
 	}
 
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
new file mode 100644
index 00000000000000..e8f03b0e95e4a7
--- /dev/null
+++ b/drivers/nvdimm/claim.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/device.h>
+#include <linux/sizes.h>
+#include "nd-core.h"
+#include "pfn.h"
+#include "btt.h"
+#include "nd.h"
+
+void __nd_detach_ndns(struct device *dev, struct nd_namespace_common **_ndns)
+{
+	struct nd_namespace_common *ndns = *_ndns;
+
+	dev_WARN_ONCE(dev, !mutex_is_locked(&ndns->dev.mutex)
+			|| ndns->claim != dev,
+			"%s: invalid claim\n", __func__);
+	ndns->claim = NULL;
+	*_ndns = NULL;
+	put_device(&ndns->dev);
+}
+
+void nd_detach_ndns(struct device *dev,
+		struct nd_namespace_common **_ndns)
+{
+	struct nd_namespace_common *ndns = *_ndns;
+
+	if (!ndns)
+		return;
+	get_device(&ndns->dev);
+	device_lock(&ndns->dev);
+	__nd_detach_ndns(dev, _ndns);
+	device_unlock(&ndns->dev);
+	put_device(&ndns->dev);
+}
+
+bool __nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach,
+		struct nd_namespace_common **_ndns)
+{
+	if (attach->claim)
+		return false;
+	dev_WARN_ONCE(dev, !mutex_is_locked(&attach->dev.mutex)
+			|| *_ndns,
+			"%s: invalid claim\n", __func__);
+	attach->claim = dev;
+	*_ndns = attach;
+	get_device(&attach->dev);
+	return true;
+}
+
+bool nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach,
+		struct nd_namespace_common **_ndns)
+{
+	bool claimed;
+
+	device_lock(&attach->dev);
+	claimed = __nd_attach_ndns(dev, attach, _ndns);
+	device_unlock(&attach->dev);
+	return claimed;
+}
+
+static int namespace_match(struct device *dev, void *data)
+{
+	char *name = data;
+
+	return strcmp(name, dev_name(dev)) == 0;
+}
+
+static bool is_idle(struct device *dev, struct nd_namespace_common *ndns)
+{
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	struct device *seed = NULL;
+
+	if (is_nd_btt(dev))
+		seed = nd_region->btt_seed;
+	else if (is_nd_pfn(dev))
+		seed = nd_region->pfn_seed;
+
+	if (seed == dev || ndns || dev->driver)
+		return false;
+	return true;
+}
+
+static void nd_detach_and_reset(struct device *dev,
+		struct nd_namespace_common **_ndns)
+{
+	/* detach the namespace and destroy / reset the device */
+	nd_detach_ndns(dev, _ndns);
+	if (is_idle(dev, *_ndns)) {
+		nd_device_unregister(dev, ND_ASYNC);
+	} else if (is_nd_btt(dev)) {
+		struct nd_btt *nd_btt = to_nd_btt(dev);
+
+		nd_btt->lbasize = 0;
+		kfree(nd_btt->uuid);
+		nd_btt->uuid = NULL;
+	} else if (is_nd_pfn(dev)) {
+		struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+
+		kfree(nd_pfn->uuid);
+		nd_pfn->uuid = NULL;
+		nd_pfn->mode = PFN_MODE_NONE;
+	}
+}
+
+ssize_t nd_namespace_store(struct device *dev,
+		struct nd_namespace_common **_ndns, const char *buf,
+		size_t len)
+{
+	struct nd_namespace_common *ndns;
+	struct device *found;
+	char *name;
+
+	if (dev->driver) {
+		dev_dbg(dev, "%s: -EBUSY\n", __func__);
+		return -EBUSY;
+	}
+
+	name = kstrndup(buf, len, GFP_KERNEL);
+	if (!name)
+		return -ENOMEM;
+	strim(name);
+
+	if (strncmp(name, "namespace", 9) == 0 || strcmp(name, "") == 0)
+		/* pass */;
+	else {
+		len = -EINVAL;
+		goto out;
+	}
+
+	ndns = *_ndns;
+	if (strcmp(name, "") == 0) {
+		nd_detach_and_reset(dev, _ndns);
+		goto out;
+	} else if (ndns) {
+		dev_dbg(dev, "namespace already set to: %s\n",
+				dev_name(&ndns->dev));
+		len = -EBUSY;
+		goto out;
+	}
+
+	found = device_find_child(dev->parent, name, namespace_match);
+	if (!found) {
+		dev_dbg(dev, "'%s' not found under %s\n", name,
+				dev_name(dev->parent));
+		len = -ENODEV;
+		goto out;
+	}
+
+	ndns = to_ndns(found);
+	if (__nvdimm_namespace_capacity(ndns) < SZ_16M) {
+		dev_dbg(dev, "%s too small to host\n", name);
+		len = -ENXIO;
+		goto out_attach;
+	}
+
+	WARN_ON_ONCE(!is_nvdimm_bus_locked(dev));
+	if (!nd_attach_ndns(dev, ndns, _ndns)) {
+		dev_dbg(dev, "%s already claimed\n",
+				dev_name(&ndns->dev));
+		len = -EBUSY;
+	}
+
+ out_attach:
+	put_device(&ndns->dev); /* from device_find_child */
+ out:
+	kfree(name);
+	return len;
+}
+
+/*
+ * nd_sb_checksum: compute checksum for a generic info block
+ *
+ * Returns a fletcher64 checksum of everything in the given info block
+ * except the last field (since that's where the checksum lives).
+ */
+u64 nd_sb_checksum(struct nd_gen_sb *nd_gen_sb)
+{
+	u64 sum;
+	__le64 sum_save;
+
+	BUILD_BUG_ON(sizeof(struct btt_sb) != SZ_4K);
+	BUILD_BUG_ON(sizeof(struct nd_pfn_sb) != SZ_4K);
+	BUILD_BUG_ON(sizeof(struct nd_gen_sb) != SZ_4K);
+
+	sum_save = nd_gen_sb->checksum;
+	nd_gen_sb->checksum = 0;
+	sum = nd_fletcher64(nd_gen_sb, sizeof(*nd_gen_sb), 1);
+	nd_gen_sb->checksum = sum_save;
+	return sum;
+}
+EXPORT_SYMBOL(nd_sb_checksum);
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index b18ffea9d85be3..9303ca29be9b6e 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -82,8 +82,16 @@ const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
 	struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
 	const char *suffix = "";
 
-	if (ndns->claim && is_nd_btt(ndns->claim))
-		suffix = "s";
+	if (ndns->claim) {
+		if (is_nd_btt(ndns->claim))
+			suffix = "s";
+		else if (is_nd_pfn(ndns->claim))
+			suffix = "m";
+		else
+			dev_WARN_ONCE(&ndns->dev, 1,
+					"unknown claim type by %s\n",
+					dev_name(ndns->claim));
+	}
 
 	if (is_namespace_pmem(&ndns->dev) || is_namespace_io(&ndns->dev))
 		sprintf(name, "pmem%d%s", nd_region->id, suffix);
@@ -1255,12 +1263,22 @@ static const struct attribute_group *nd_namespace_attribute_groups[] = {
 struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev)
 {
 	struct nd_btt *nd_btt = is_nd_btt(dev) ? to_nd_btt(dev) : NULL;
+	struct nd_pfn *nd_pfn = is_nd_pfn(dev) ? to_nd_pfn(dev) : NULL;
 	struct nd_namespace_common *ndns;
 	resource_size_t size;
 
-	if (nd_btt) {
-		ndns = nd_btt->ndns;
-		if (!ndns)
+	if (nd_btt || nd_pfn) {
+		struct device *host = NULL;
+
+		if (nd_btt) {
+			host = &nd_btt->dev;
+			ndns = nd_btt->ndns;
+		} else if (nd_pfn) {
+			host = &nd_pfn->dev;
+			ndns = nd_pfn->ndns;
+		}
+
+		if (!ndns || !host)
 			return ERR_PTR(-ENODEV);
 
 		/*
@@ -1271,12 +1289,12 @@ struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev)
 		device_unlock(&ndns->dev);
 		if (ndns->dev.driver) {
 			dev_dbg(&ndns->dev, "is active, can't bind %s\n",
-					dev_name(&nd_btt->dev));
+					dev_name(host));
 			return ERR_PTR(-EBUSY);
 		}
-		if (dev_WARN_ONCE(&ndns->dev, ndns->claim != &nd_btt->dev,
+		if (dev_WARN_ONCE(&ndns->dev, ndns->claim != host,
 					"host (%s) vs claim (%s) mismatch\n",
-					dev_name(&nd_btt->dev),
+					dev_name(host),
 					dev_name(ndns->claim)))
 			return ERR_PTR(-ENXIO);
 	} else {
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index e1970c71ad1c53..159aed53204249 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -80,4 +80,13 @@ struct resource *nsblk_add_resource(struct nd_region *nd_region,
 int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd);
 void get_ndd(struct nvdimm_drvdata *ndd);
 resource_size_t __nvdimm_namespace_capacity(struct nd_namespace_common *ndns);
+void nd_detach_ndns(struct device *dev, struct nd_namespace_common **_ndns);
+void __nd_detach_ndns(struct device *dev, struct nd_namespace_common **_ndns);
+bool nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach,
+		struct nd_namespace_common **_ndns);
+bool __nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach,
+		struct nd_namespace_common **_ndns);
+ssize_t nd_namespace_store(struct device *dev,
+		struct nd_namespace_common **_ndns, const char *buf,
+		size_t len);
 #endif /* __ND_CORE_H__ */
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index f9615824947bd7..8da2be1cecb8ca 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -29,6 +29,13 @@ enum {
 	ND_MAX_LANES = 256,
 	SECTOR_SHIFT = 9,
 	INT_LBASIZE_ALIGNMENT = 64,
+#if IS_ENABLED(CONFIG_NVDIMM_PFN)
+	ND_PFN_ALIGN = PAGES_PER_SECTION * PAGE_SIZE,
+	ND_PFN_MASK = ND_PFN_ALIGN - 1,
+#else
+	ND_PFN_ALIGN = 0,
+	ND_PFN_MASK = 0,
+#endif
 };
 
 struct nvdimm_drvdata {
@@ -92,8 +99,10 @@ struct nd_region {
 	struct device dev;
 	struct ida ns_ida;
 	struct ida btt_ida;
+	struct ida pfn_ida;
 	struct device *ns_seed;
 	struct device *btt_seed;
+	struct device *pfn_seed;
 	u16 ndr_mappings;
 	u64 ndr_size;
 	u64 ndr_start;
@@ -133,6 +142,22 @@ struct nd_btt {
 	int id;
 };
 
+enum nd_pfn_mode {
+	PFN_MODE_NONE,
+	PFN_MODE_RAM,
+	PFN_MODE_PMEM,
+};
+
+struct nd_pfn {
+	int id;
+	u8 *uuid;
+	struct device dev;
+	unsigned long npfns;
+	enum nd_pfn_mode mode;
+	struct nd_pfn_sb *pfn_sb;
+	struct nd_namespace_common *ndns;
+};
+
 enum nd_async_mode {
 	ND_SYNC,
 	ND_ASYNC,
@@ -159,8 +184,13 @@ int nvdimm_init_config_data(struct nvdimm_drvdata *ndd);
 int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
 		void *buf, size_t len);
 struct nd_btt *to_nd_btt(struct device *dev);
-struct btt_sb;
-u64 nd_btt_sb_checksum(struct btt_sb *btt_sb);
+
+struct nd_gen_sb {
+	char reserved[SZ_4K - 8];
+	__le64 checksum;
+};
+
+u64 nd_sb_checksum(struct nd_gen_sb *sb);
 #if IS_ENABLED(CONFIG_BTT)
 int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata);
 bool is_nd_btt(struct device *dev);
@@ -180,8 +210,30 @@ static inline struct device *nd_btt_create(struct nd_region *nd_region)
 {
 	return NULL;
 }
+#endif
 
+struct nd_pfn *to_nd_pfn(struct device *dev);
+#if IS_ENABLED(CONFIG_NVDIMM_PFN)
+int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata);
+bool is_nd_pfn(struct device *dev);
+struct device *nd_pfn_create(struct nd_region *nd_region);
+#else
+static inline int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata)
+{
+	return -ENODEV;
+}
+
+static inline bool is_nd_pfn(struct device *dev)
+{
+	return false;
+}
+
+static inline struct device *nd_pfn_create(struct nd_region *nd_region)
+{
+	return NULL;
+}
 #endif
+
 struct nd_region *to_nd_region(struct device *dev);
 int nd_region_to_nstype(struct nd_region *nd_region);
 int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
diff --git a/drivers/nvdimm/pfn.h b/drivers/nvdimm/pfn.h
new file mode 100644
index 00000000000000..cc243754acef00
--- /dev/null
+++ b/drivers/nvdimm/pfn.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2014-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __NVDIMM_PFN_H
+#define __NVDIMM_PFN_H
+
+#include <linux/types.h>
+
+#define PFN_SIG_LEN 16
+#define PFN_SIG "NVDIMM_PFN_INFO\0"
+
+struct nd_pfn_sb {
+	u8 signature[PFN_SIG_LEN];
+	u8 uuid[16];
+	u8 parent_uuid[16];
+	__le32 flags;
+	__le16 version_major;
+	__le16 version_minor;
+	__le64 dataoff;
+	__le64 npfns;
+	__le32 mode;
+	u8 padding[4012];
+	__le64 checksum;
+};
+#endif /* __NVDIMM_PFN_H */
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
new file mode 100644
index 00000000000000..f708d63709a5d3
--- /dev/null
+++ b/drivers/nvdimm/pfn_devs.c
@@ -0,0 +1,336 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/blkdev.h>
+#include <linux/device.h>
+#include <linux/genhd.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include "nd-core.h"
+#include "pfn.h"
+#include "nd.h"
+
+static void nd_pfn_release(struct device *dev)
+{
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+
+	dev_dbg(dev, "%s\n", __func__);
+	nd_detach_ndns(&nd_pfn->dev, &nd_pfn->ndns);
+	ida_simple_remove(&nd_region->pfn_ida, nd_pfn->id);
+	kfree(nd_pfn->uuid);
+	kfree(nd_pfn);
+}
+
+static struct device_type nd_pfn_device_type = {
+	.name = "nd_pfn",
+	.release = nd_pfn_release,
+};
+
+bool is_nd_pfn(struct device *dev)
+{
+	return dev ? dev->type == &nd_pfn_device_type : false;
+}
+EXPORT_SYMBOL(is_nd_pfn);
+
+struct nd_pfn *to_nd_pfn(struct device *dev)
+{
+	struct nd_pfn *nd_pfn = container_of(dev, struct nd_pfn, dev);
+
+	WARN_ON(!is_nd_pfn(dev));
+	return nd_pfn;
+}
+EXPORT_SYMBOL(to_nd_pfn);
+
+static ssize_t mode_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+
+	switch (nd_pfn->mode) {
+	case PFN_MODE_RAM:
+		return sprintf(buf, "ram\n");
+	case PFN_MODE_PMEM:
+		return sprintf(buf, "pmem\n");
+	default:
+		return sprintf(buf, "none\n");
+	}
+}
+
+static ssize_t mode_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+	ssize_t rc = 0;
+
+	device_lock(dev);
+	nvdimm_bus_lock(dev);
+	if (dev->driver)
+		rc = -EBUSY;
+	else {
+		size_t n = len - 1;
+
+		if (strncmp(buf, "pmem\n", n) == 0
+				|| strncmp(buf, "pmem", n) == 0) {
+			/* TODO: allocate from PMEM support */
+			rc = -ENOTTY;
+		} else if (strncmp(buf, "ram\n", n) == 0
+				|| strncmp(buf, "ram", n) == 0)
+			nd_pfn->mode = PFN_MODE_RAM;
+		else if (strncmp(buf, "none\n", n) == 0
+				|| strncmp(buf, "none", n) == 0)
+			nd_pfn->mode = PFN_MODE_NONE;
+		else
+			rc = -EINVAL;
+	}
+	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	nvdimm_bus_unlock(dev);
+	device_unlock(dev);
+
+	return rc ? rc : len;
+}
+static DEVICE_ATTR_RW(mode);
+
+static ssize_t uuid_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+
+	if (nd_pfn->uuid)
+		return sprintf(buf, "%pUb\n", nd_pfn->uuid);
+	return sprintf(buf, "\n");
+}
+
+static ssize_t uuid_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+	ssize_t rc;
+
+	device_lock(dev);
+	rc = nd_uuid_store(dev, &nd_pfn->uuid, buf, len);
+	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	device_unlock(dev);
+
+	return rc ? rc : len;
+}
+static DEVICE_ATTR_RW(uuid);
+
+static ssize_t namespace_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+	ssize_t rc;
+
+	nvdimm_bus_lock(dev);
+	rc = sprintf(buf, "%s\n", nd_pfn->ndns
+			? dev_name(&nd_pfn->ndns->dev) : "");
+	nvdimm_bus_unlock(dev);
+	return rc;
+}
+
+static ssize_t namespace_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+	ssize_t rc;
+
+	nvdimm_bus_lock(dev);
+	device_lock(dev);
+	rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len);
+	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	device_unlock(dev);
+	nvdimm_bus_unlock(dev);
+
+	return rc;
+}
+static DEVICE_ATTR_RW(namespace);
+
+static struct attribute *nd_pfn_attributes[] = {
+	&dev_attr_mode.attr,
+	&dev_attr_namespace.attr,
+	&dev_attr_uuid.attr,
+	NULL,
+};
+
+static struct attribute_group nd_pfn_attribute_group = {
+	.attrs = nd_pfn_attributes,
+};
+
+static const struct attribute_group *nd_pfn_attribute_groups[] = {
+	&nd_pfn_attribute_group,
+	&nd_device_attribute_group,
+	&nd_numa_attribute_group,
+	NULL,
+};
+
+static struct device *__nd_pfn_create(struct nd_region *nd_region,
+		u8 *uuid, enum nd_pfn_mode mode,
+		struct nd_namespace_common *ndns)
+{
+	struct nd_pfn *nd_pfn;
+	struct device *dev;
+
+	/* we can only create pages for contiguous ranged of pmem */
+	if (!is_nd_pmem(&nd_region->dev))
+		return NULL;
+
+	nd_pfn = kzalloc(sizeof(*nd_pfn), GFP_KERNEL);
+	if (!nd_pfn)
+		return NULL;
+
+	nd_pfn->id = ida_simple_get(&nd_region->pfn_ida, 0, 0, GFP_KERNEL);
+	if (nd_pfn->id < 0) {
+		kfree(nd_pfn);
+		return NULL;
+	}
+
+	nd_pfn->mode = mode;
+	if (uuid)
+		uuid = kmemdup(uuid, 16, GFP_KERNEL);
+	nd_pfn->uuid = uuid;
+	dev = &nd_pfn->dev;
+	dev_set_name(dev, "pfn%d.%d", nd_region->id, nd_pfn->id);
+	dev->parent = &nd_region->dev;
+	dev->type = &nd_pfn_device_type;
+	dev->groups = nd_pfn_attribute_groups;
+	device_initialize(&nd_pfn->dev);
+	if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) {
+		dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n",
+				__func__, dev_name(ndns->claim));
+		put_device(dev);
+		return NULL;
+	}
+	return dev;
+}
+
+struct device *nd_pfn_create(struct nd_region *nd_region)
+{
+	struct device *dev = __nd_pfn_create(nd_region, NULL, PFN_MODE_NONE,
+			NULL);
+
+	if (dev)
+		__nd_device_register(dev);
+	return dev;
+}
+
+static int nd_pfn_validate(struct nd_pfn *nd_pfn)
+{
+	struct nd_namespace_common *ndns = nd_pfn->ndns;
+	struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
+	struct nd_namespace_io *nsio;
+	u64 checksum, offset;
+
+	if (!pfn_sb || !ndns)
+		return -ENODEV;
+
+	if (!is_nd_pmem(nd_pfn->dev.parent))
+		return -ENODEV;
+
+	/* section alignment for simple hotplug */
+	if (nvdimm_namespace_capacity(ndns) < ND_PFN_ALIGN)
+		return -ENODEV;
+
+	if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb)))
+		return -ENXIO;
+
+	if (memcmp(pfn_sb->signature, PFN_SIG, PFN_SIG_LEN) != 0)
+		return -ENODEV;
+
+	checksum = le64_to_cpu(pfn_sb->checksum);
+	pfn_sb->checksum = 0;
+	if (checksum != nd_sb_checksum((struct nd_gen_sb *) pfn_sb))
+		return -ENODEV;
+	pfn_sb->checksum = cpu_to_le64(checksum);
+
+	switch (le32_to_cpu(pfn_sb->mode)) {
+	case PFN_MODE_RAM:
+		break;
+	case PFN_MODE_PMEM:
+		/* TODO: allocate from PMEM support */
+		return -ENOTTY;
+	default:
+		return -ENXIO;
+	}
+
+	if (!nd_pfn->uuid) {
+		/* from probe we allocate */
+		nd_pfn->uuid = kmemdup(pfn_sb->uuid, 16, GFP_KERNEL);
+		if (!nd_pfn->uuid)
+			return -ENOMEM;
+	} else {
+		/* from init we validate */
+		if (memcmp(nd_pfn->uuid, pfn_sb->uuid, 16) != 0)
+			return -EINVAL;
+	}
+
+	/*
+	 * These warnings are verbose because they can only trigger in
+	 * the case where the physical address alignment of the
+	 * namespace has changed since the pfn superblock was
+	 * established.
+	 */
+	offset = le64_to_cpu(pfn_sb->dataoff);
+	nsio = to_nd_namespace_io(&ndns->dev);
+	if ((nsio->res.start + offset) & (ND_PFN_ALIGN - 1)) {
+		dev_err(&nd_pfn->dev,
+				"init failed: %s with offset %#llx not section aligned\n",
+				dev_name(&ndns->dev), offset);
+		return -EBUSY;
+	} else if (offset >= resource_size(&nsio->res)) {
+		dev_err(&nd_pfn->dev, "pfn array size exceeds capacity of %s\n",
+				dev_name(&ndns->dev));
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata)
+{
+	int rc;
+	struct device *dev;
+	struct nd_pfn *nd_pfn;
+	struct nd_pfn_sb *pfn_sb;
+	struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
+
+	if (ndns->force_raw)
+		return -ENODEV;
+
+	nvdimm_bus_lock(&ndns->dev);
+	dev = __nd_pfn_create(nd_region, NULL, PFN_MODE_NONE, ndns);
+	nvdimm_bus_unlock(&ndns->dev);
+	if (!dev)
+		return -ENOMEM;
+	dev_set_drvdata(dev, drvdata);
+	pfn_sb = kzalloc(sizeof(*pfn_sb), GFP_KERNEL);
+	nd_pfn = to_nd_pfn(dev);
+	nd_pfn->pfn_sb = pfn_sb;
+	rc = nd_pfn_validate(nd_pfn);
+	nd_pfn->pfn_sb = NULL;
+	kfree(pfn_sb);
+	dev_dbg(&ndns->dev, "%s: pfn: %s\n", __func__,
+			rc == 0 ? dev_name(dev) : "<none>");
+	if (rc < 0) {
+		__nd_detach_ndns(dev, &nd_pfn->ndns);
+		put_device(dev);
+	} else
+		__nd_device_register(&nd_pfn->dev);
+
+	return rc;
+}
+EXPORT_SYMBOL(nd_pfn_probe);
diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
index f28f78ccff190c..7da63eac78eec1 100644
--- a/drivers/nvdimm/region.c
+++ b/drivers/nvdimm/region.c
@@ -53,6 +53,7 @@ static int nd_region_probe(struct device *dev)
 		return -ENODEV;
 
 	nd_region->btt_seed = nd_btt_create(nd_region);
+	nd_region->pfn_seed = nd_pfn_create(nd_region);
 	if (err == 0)
 		return 0;
 
@@ -84,6 +85,7 @@ static int nd_region_remove(struct device *dev)
 	nvdimm_bus_lock(dev);
 	nd_region->ns_seed = NULL;
 	nd_region->btt_seed = NULL;
+	nd_region->pfn_seed = NULL;
 	dev_set_drvdata(dev, NULL);
 	nvdimm_bus_unlock(dev);
 
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 7384455792bfb6..da4338154ad219 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -345,6 +345,23 @@ static ssize_t btt_seed_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(btt_seed);
 
+static ssize_t pfn_seed_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+	ssize_t rc;
+
+	nvdimm_bus_lock(dev);
+	if (nd_region->pfn_seed)
+		rc = sprintf(buf, "%s\n", dev_name(nd_region->pfn_seed));
+	else
+		rc = sprintf(buf, "\n");
+	nvdimm_bus_unlock(dev);
+
+	return rc;
+}
+static DEVICE_ATTR_RO(pfn_seed);
+
 static ssize_t read_only_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
@@ -373,6 +390,7 @@ static struct attribute *nd_region_attributes[] = {
 	&dev_attr_nstype.attr,
 	&dev_attr_mappings.attr,
 	&dev_attr_btt_seed.attr,
+	&dev_attr_pfn_seed.attr,
 	&dev_attr_read_only.attr,
 	&dev_attr_set_cookie.attr,
 	&dev_attr_available_size.attr,
@@ -744,6 +762,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
 	nd_region->numa_node = ndr_desc->numa_node;
 	ida_init(&nd_region->ns_ida);
 	ida_init(&nd_region->btt_ida);
+	ida_init(&nd_region->pfn_ida);
 	dev = &nd_region->dev;
 	dev_set_name(dev, "region%d", nd_region->id);
 	dev->parent = &nvdimm_bus->dev;
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild
index 98f2881ba6a288..99e70f0729be1d 100644
--- a/tools/testing/nvdimm/Kbuild
+++ b/tools/testing/nvdimm/Kbuild
@@ -43,7 +43,9 @@ libnvdimm-y += $(NVDIMM_SRC)/region_devs.o
 libnvdimm-y += $(NVDIMM_SRC)/region.o
 libnvdimm-y += $(NVDIMM_SRC)/namespace_devs.o
 libnvdimm-y += $(NVDIMM_SRC)/label.o
+libnvdimm-$(CONFIG_ND_CLAIM) += $(NVDIMM_SRC)/claim.o
 libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o
+libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o
 libnvdimm-y += config_check.o
 
 obj-m += test/

From 32ab0a3f51701cb37ab960635254d5f84ec3de0a Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sat, 1 Aug 2015 02:16:37 -0400
Subject: [PATCH 447/734] libnvdimm, pmem: 'struct page' for pmem

Enable the pmem driver to handle PFN device instances.  Attaching a pmem
namespace to a pfn device triggers the driver to allocate and initialize
struct page entries for pmem.  Memory capacity for this allocation comes
exclusively from RAM for now which is suitable for low PMEM to RAM
ratios.  This mechanism will be expanded later for setting an "allocate
from PMEM" policy.

Cc: Boaz Harrosh <boaz@plexistor.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/Kconfig            |   1 +
 drivers/nvdimm/nd.h               |   6 +
 drivers/nvdimm/pfn_devs.c         |   9 +-
 drivers/nvdimm/pmem.c             | 203 ++++++++++++++++++++++++++++--
 tools/testing/nvdimm/Kbuild       |   1 +
 tools/testing/nvdimm/test/iomap.c |  13 ++
 6 files changed, 216 insertions(+), 17 deletions(-)

diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
index ace25b53b75565..53c11621d5b16f 100644
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@@ -76,6 +76,7 @@ config ND_PFN
 config NVDIMM_PFN
 	bool "PFN: Map persistent (device) memory"
 	default LIBNVDIMM
+	depends on ZONE_DEVICE
 	select ND_CLAIM
 	help
 	  Map persistent memory, i.e. advertise it to the memory
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 8da2be1cecb8ca..83e5d09350123b 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -217,6 +217,7 @@ struct nd_pfn *to_nd_pfn(struct device *dev);
 int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata);
 bool is_nd_pfn(struct device *dev);
 struct device *nd_pfn_create(struct nd_region *nd_region);
+int nd_pfn_validate(struct nd_pfn *nd_pfn);
 #else
 static inline int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata)
 {
@@ -232,6 +233,11 @@ static inline struct device *nd_pfn_create(struct nd_region *nd_region)
 {
 	return NULL;
 }
+
+static inline int nd_pfn_validate(struct nd_pfn *nd_pfn)
+{
+	return -ENODEV;
+}
 #endif
 
 struct nd_region *to_nd_region(struct device *dev);
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index f708d63709a5d3..3fd7d0d81a470b 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -228,7 +228,7 @@ struct device *nd_pfn_create(struct nd_region *nd_region)
 	return dev;
 }
 
-static int nd_pfn_validate(struct nd_pfn *nd_pfn)
+int nd_pfn_validate(struct nd_pfn *nd_pfn)
 {
 	struct nd_namespace_common *ndns = nd_pfn->ndns;
 	struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
@@ -286,10 +286,10 @@ static int nd_pfn_validate(struct nd_pfn *nd_pfn)
 	 */
 	offset = le64_to_cpu(pfn_sb->dataoff);
 	nsio = to_nd_namespace_io(&ndns->dev);
-	if ((nsio->res.start + offset) & (ND_PFN_ALIGN - 1)) {
+	if (nsio->res.start & ND_PFN_MASK) {
 		dev_err(&nd_pfn->dev,
-				"init failed: %s with offset %#llx not section aligned\n",
-				dev_name(&ndns->dev), offset);
+				"init failed: %s not section aligned\n",
+				dev_name(&ndns->dev));
 		return -EBUSY;
 	} else if (offset >= resource_size(&nsio->res)) {
 		dev_err(&nd_pfn->dev, "pfn array size exceeds capacity of %s\n",
@@ -299,6 +299,7 @@ static int nd_pfn_validate(struct nd_pfn *nd_pfn)
 
 	return 0;
 }
+EXPORT_SYMBOL(nd_pfn_validate);
 
 int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata)
 {
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 20bf122328da50..2f885e5d9c3651 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -21,18 +21,24 @@
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/module.h>
+#include <linux/memory_hotplug.h>
 #include <linux/moduleparam.h>
+#include <linux/vmalloc.h>
 #include <linux/slab.h>
 #include <linux/pmem.h>
 #include <linux/nd.h>
+#include "pfn.h"
 #include "nd.h"
 
 struct pmem_device {
 	struct request_queue	*pmem_queue;
 	struct gendisk		*pmem_disk;
+	struct nd_namespace_common *ndns;
 
 	/* One contiguous memory region per device */
 	phys_addr_t		phys_addr;
+	/* when non-zero this device is hosting a 'pfn' instance */
+	phys_addr_t		data_offset;
 	void __pmem		*virt_addr;
 	size_t			size;
 };
@@ -44,7 +50,7 @@ static void pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 			sector_t sector)
 {
 	void *mem = kmap_atomic(page);
-	size_t pmem_off = sector << 9;
+	phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
 	void __pmem *pmem_addr = pmem->virt_addr + pmem_off;
 
 	if (rw == READ) {
@@ -95,16 +101,23 @@ static long pmem_direct_access(struct block_device *bdev, sector_t sector,
 		      void __pmem **kaddr, unsigned long *pfn)
 {
 	struct pmem_device *pmem = bdev->bd_disk->private_data;
-	size_t offset = sector << 9;
-
-	if (!pmem)
-		return -ENODEV;
+	resource_size_t offset = sector * 512 + pmem->data_offset;
+	resource_size_t size;
+
+	if (pmem->data_offset) {
+		/*
+		 * Limit the direct_access() size to what is covered by
+		 * the memmap
+		 */
+		size = (pmem->size - offset) & ~ND_PFN_MASK;
+	} else
+		size = pmem->size - offset;
 
 	/* FIXME convert DAX to comprehend that this mapping has a lifetime */
 	*kaddr = pmem->virt_addr + offset;
 	*pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT;
 
-	return pmem->size - offset;
+	return size;
 }
 
 static const struct block_device_operations pmem_fops = {
@@ -144,13 +157,16 @@ static struct pmem_device *pmem_alloc(struct device *dev,
 
 static void pmem_detach_disk(struct pmem_device *pmem)
 {
+	if (!pmem->pmem_disk)
+		return;
+
 	del_gendisk(pmem->pmem_disk);
 	put_disk(pmem->pmem_disk);
 	blk_cleanup_queue(pmem->pmem_queue);
 }
 
-static int pmem_attach_disk(struct nd_namespace_common *ndns,
-		struct pmem_device *pmem)
+static int pmem_attach_disk(struct device *dev,
+		struct nd_namespace_common *ndns, struct pmem_device *pmem)
 {
 	struct gendisk *disk;
 
@@ -177,8 +193,8 @@ static int pmem_attach_disk(struct nd_namespace_common *ndns,
 	disk->queue		= pmem->pmem_queue;
 	disk->flags		= GENHD_FL_EXT_DEVT;
 	nvdimm_namespace_disk_name(ndns, disk->disk_name);
-	disk->driverfs_dev = &ndns->dev;
-	set_capacity(disk, pmem->size >> 9);
+	disk->driverfs_dev = dev;
+	set_capacity(disk, (pmem->size - pmem->data_offset) / 512);
 	pmem->pmem_disk = disk;
 
 	add_disk(disk);
@@ -207,6 +223,154 @@ static int pmem_rw_bytes(struct nd_namespace_common *ndns,
 	return 0;
 }
 
+static int nd_pfn_init(struct nd_pfn *nd_pfn)
+{
+	struct nd_pfn_sb *pfn_sb = kzalloc(sizeof(*pfn_sb), GFP_KERNEL);
+	struct pmem_device *pmem = dev_get_drvdata(&nd_pfn->dev);
+	struct nd_namespace_common *ndns = nd_pfn->ndns;
+	struct nd_region *nd_region;
+	unsigned long npfns;
+	phys_addr_t offset;
+	u64 checksum;
+	int rc;
+
+	if (!pfn_sb)
+		return -ENOMEM;
+
+	nd_pfn->pfn_sb = pfn_sb;
+	rc = nd_pfn_validate(nd_pfn);
+	if (rc == 0 || rc == -EBUSY)
+		return rc;
+
+	/* section alignment for simple hotplug */
+	if (nvdimm_namespace_capacity(ndns) < ND_PFN_ALIGN
+			|| pmem->phys_addr & ND_PFN_MASK)
+		return -ENODEV;
+
+	nd_region = to_nd_region(nd_pfn->dev.parent);
+	if (nd_region->ro) {
+		dev_info(&nd_pfn->dev,
+				"%s is read-only, unable to init metadata\n",
+				dev_name(&nd_region->dev));
+		goto err;
+	}
+
+	memset(pfn_sb, 0, sizeof(*pfn_sb));
+	npfns = (pmem->size - SZ_8K) / SZ_4K;
+	/*
+	 * Note, we use 64 here for the standard size of struct page,
+	 * debugging options may cause it to be larger in which case the
+	 * implementation will limit the pfns advertised through
+	 * ->direct_access() to those that are included in the memmap.
+	 */
+	if (nd_pfn->mode == PFN_MODE_PMEM)
+		offset = ALIGN(SZ_8K + 64 * npfns, PMD_SIZE);
+	else if (nd_pfn->mode == PFN_MODE_RAM)
+		offset = SZ_8K;
+	else
+		goto err;
+
+	npfns = (pmem->size - offset) / SZ_4K;
+	pfn_sb->mode = cpu_to_le32(nd_pfn->mode);
+	pfn_sb->dataoff = cpu_to_le64(offset);
+	pfn_sb->npfns = cpu_to_le64(npfns);
+	memcpy(pfn_sb->signature, PFN_SIG, PFN_SIG_LEN);
+	memcpy(pfn_sb->uuid, nd_pfn->uuid, 16);
+	pfn_sb->version_major = cpu_to_le16(1);
+	checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb);
+	pfn_sb->checksum = cpu_to_le64(checksum);
+
+	rc = nvdimm_write_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb));
+	if (rc)
+		goto err;
+
+	return 0;
+ err:
+	nd_pfn->pfn_sb = NULL;
+	kfree(pfn_sb);
+	return -ENXIO;
+}
+
+static int nvdimm_namespace_detach_pfn(struct nd_namespace_common *ndns)
+{
+	struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim);
+	struct pmem_device *pmem;
+
+	/* free pmem disk */
+	pmem = dev_get_drvdata(&nd_pfn->dev);
+	pmem_detach_disk(pmem);
+
+	/* release nd_pfn resources */
+	kfree(nd_pfn->pfn_sb);
+	nd_pfn->pfn_sb = NULL;
+
+	return 0;
+}
+
+static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns)
+{
+	struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
+	struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim);
+	struct device *dev = &nd_pfn->dev;
+	struct vmem_altmap *altmap;
+	struct nd_region *nd_region;
+	struct nd_pfn_sb *pfn_sb;
+	struct pmem_device *pmem;
+	phys_addr_t offset;
+	int rc;
+
+	if (!nd_pfn->uuid || !nd_pfn->ndns)
+		return -ENODEV;
+
+	nd_region = to_nd_region(dev->parent);
+	rc = nd_pfn_init(nd_pfn);
+	if (rc)
+		return rc;
+
+	if (PAGE_SIZE != SZ_4K) {
+		dev_err(dev, "only supported on systems with 4K PAGE_SIZE\n");
+		return -ENXIO;
+	}
+	if (nsio->res.start & ND_PFN_MASK) {
+		dev_err(dev, "%s not memory hotplug section aligned\n",
+				dev_name(&ndns->dev));
+		return -ENXIO;
+	}
+
+	pfn_sb = nd_pfn->pfn_sb;
+	offset = le64_to_cpu(pfn_sb->dataoff);
+	nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode);
+	if (nd_pfn->mode == PFN_MODE_RAM) {
+		if (offset != SZ_8K)
+			return -EINVAL;
+		nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns);
+		altmap = NULL;
+	} else {
+		rc = -ENXIO;
+		goto err;
+	}
+
+	/* establish pfn range for lookup, and switch to direct map */
+	pmem = dev_get_drvdata(dev);
+	memunmap_pmem(dev, pmem->virt_addr);
+	pmem->virt_addr = (void __pmem *)devm_memremap_pages(dev, &nsio->res);
+	if (IS_ERR(pmem->virt_addr)) {
+		rc = PTR_ERR(pmem->virt_addr);
+		goto err;
+	}
+
+	/* attach pmem disk in "pfn-mode" */
+	pmem->data_offset = offset;
+	rc = pmem_attach_disk(dev, ndns, pmem);
+	if (rc)
+		goto err;
+
+	return rc;
+ err:
+	nvdimm_namespace_detach_pfn(ndns);
+	return rc;
+}
+
 static int nd_pmem_probe(struct device *dev)
 {
 	struct nd_region *nd_region = to_nd_region(dev->parent);
@@ -223,16 +387,27 @@ static int nd_pmem_probe(struct device *dev)
 	if (IS_ERR(pmem))
 		return PTR_ERR(pmem);
 
+	pmem->ndns = ndns;
 	dev_set_drvdata(dev, pmem);
 	ndns->rw_bytes = pmem_rw_bytes;
 
 	if (is_nd_btt(dev))
 		return nvdimm_namespace_attach_btt(ndns);
 
-	if (nd_btt_probe(ndns, pmem) == 0)
+	if (is_nd_pfn(dev))
+		return nvdimm_namespace_attach_pfn(ndns);
+
+	if (nd_btt_probe(ndns, pmem) == 0) {
 		/* we'll come back as btt-pmem */
 		return -ENXIO;
-	return pmem_attach_disk(ndns, pmem);
+	}
+
+	if (nd_pfn_probe(ndns, pmem) == 0) {
+		/* we'll come back as pfn-pmem */
+		return -ENXIO;
+	}
+
+	return pmem_attach_disk(dev, ndns, pmem);
 }
 
 static int nd_pmem_remove(struct device *dev)
@@ -240,7 +415,9 @@ static int nd_pmem_remove(struct device *dev)
 	struct pmem_device *pmem = dev_get_drvdata(dev);
 
 	if (is_nd_btt(dev))
-		nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns);
+		nvdimm_namespace_detach_btt(pmem->ndns);
+	else if (is_nd_pfn(dev))
+		nvdimm_namespace_detach_pfn(pmem->ndns);
 	else
 		pmem_detach_disk(pmem);
 
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild
index 99e70f0729be1d..38b00ecb2ed55a 100644
--- a/tools/testing/nvdimm/Kbuild
+++ b/tools/testing/nvdimm/Kbuild
@@ -2,6 +2,7 @@ ldflags-y += --wrap=ioremap_wc
 ldflags-y += --wrap=memremap
 ldflags-y += --wrap=devm_ioremap_nocache
 ldflags-y += --wrap=devm_memremap
+ldflags-y += --wrap=devm_memunmap
 ldflags-y += --wrap=ioremap_nocache
 ldflags-y += --wrap=iounmap
 ldflags-y += --wrap=memunmap
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index 179d2289f3a828..b7251314bbc038 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -108,6 +108,19 @@ void *__wrap_memremap(resource_size_t offset, size_t size,
 }
 EXPORT_SYMBOL(__wrap_memremap);
 
+void __wrap_devm_memunmap(struct device *dev, void *addr)
+{
+	struct nfit_test_resource *nfit_res;
+
+	rcu_read_lock();
+	nfit_res = get_nfit_res((unsigned long) addr);
+	rcu_read_unlock();
+	if (nfit_res)
+		return;
+	return devm_memunmap(dev, addr);
+}
+EXPORT_SYMBOL(__wrap_devm_memunmap);
+
 void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size)
 {
 	return __nfit_test_ioremap(offset, size, ioremap_nocache);

From 004f1afbe199e6ab20805b95aefd83ccd24bc5c7 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 24 Aug 2015 19:20:23 -0400
Subject: [PATCH 448/734] libnvdimm, pmem: direct map legacy pmem by default

The expectation is that the legacy / non-standard pmem discovery method
(e820 type-12) will only ever be used to describe small quantities of
persistent memory.  Larger capacities will be described via the ACPI
NFIT.  When "allocate struct page from pmem" support is added this default
policy can be overridden by assigning a legacy pmem namespace to a pfn
device, however this would be only be necessary if a platform used the
legacy mechanism to define a very large range.

Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/e820.c           |  1 +
 drivers/nvdimm/namespace_devs.c | 35 ++++++++++++++++++++++++++++-----
 drivers/nvdimm/nd.h             |  2 ++
 drivers/nvdimm/pmem.c           | 15 +++++++++++---
 drivers/nvdimm/region_devs.c    |  1 +
 include/linux/libnvdimm.h       |  4 ++++
 6 files changed, 50 insertions(+), 8 deletions(-)

diff --git a/drivers/nvdimm/e820.c b/drivers/nvdimm/e820.c
index 1b5743ad92db3d..8282db2ef99ea0 100644
--- a/drivers/nvdimm/e820.c
+++ b/drivers/nvdimm/e820.c
@@ -49,6 +49,7 @@ static int e820_pmem_probe(struct platform_device *pdev)
 		ndr_desc.res = p;
 		ndr_desc.attr_groups = e820_pmem_region_attribute_groups;
 		ndr_desc.numa_node = NUMA_NO_NODE;
+		set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
 		if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc))
 			goto err;
 	}
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 9303ca29be9b6e..0955b2cb10fe87 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/device.h>
 #include <linux/slab.h>
+#include <linux/pmem.h>
 #include <linux/nd.h>
 #include "nd-core.h"
 #include "nd.h"
@@ -76,11 +77,32 @@ static bool is_namespace_io(struct device *dev)
 	return dev ? dev->type == &namespace_io_device_type : false;
 }
 
+bool pmem_should_map_pages(struct device *dev)
+{
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+
+	if (!IS_ENABLED(CONFIG_ZONE_DEVICE))
+		return false;
+
+	if (!test_bit(ND_REGION_PAGEMAP, &nd_region->flags))
+		return false;
+
+	if (is_nd_pfn(dev) || is_nd_btt(dev))
+		return false;
+
+#ifdef ARCH_MEMREMAP_PMEM
+	return ARCH_MEMREMAP_PMEM == MEMREMAP_WB;
+#else
+	return false;
+#endif
+}
+EXPORT_SYMBOL(pmem_should_map_pages);
+
 const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
 		char *name)
 {
 	struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
-	const char *suffix = "";
+	const char *suffix = NULL;
 
 	if (ndns->claim) {
 		if (is_nd_btt(ndns->claim))
@@ -93,13 +115,16 @@ const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
 					dev_name(ndns->claim));
 	}
 
-	if (is_namespace_pmem(&ndns->dev) || is_namespace_io(&ndns->dev))
-		sprintf(name, "pmem%d%s", nd_region->id, suffix);
-	else if (is_namespace_blk(&ndns->dev)) {
+	if (is_namespace_pmem(&ndns->dev) || is_namespace_io(&ndns->dev)) {
+		if (!suffix && pmem_should_map_pages(&ndns->dev))
+			suffix = "m";
+		sprintf(name, "pmem%d%s", nd_region->id, suffix ? suffix : "");
+	} else if (is_namespace_blk(&ndns->dev)) {
 		struct nd_namespace_blk *nsblk;
 
 		nsblk = to_nd_namespace_blk(&ndns->dev);
-		sprintf(name, "ndblk%d.%d%s", nd_region->id, nsblk->id, suffix);
+		sprintf(name, "ndblk%d.%d%s", nd_region->id, nsblk->id,
+				suffix ? suffix : "");
 	} else {
 		return NULL;
 	}
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 83e5d09350123b..417e521d299cb4 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -100,6 +100,7 @@ struct nd_region {
 	struct ida ns_ida;
 	struct ida btt_ida;
 	struct ida pfn_ida;
+	unsigned long flags;
 	struct device *ns_seed;
 	struct device *btt_seed;
 	struct device *pfn_seed;
@@ -276,4 +277,5 @@ static inline bool nd_iostat_start(struct bio *bio, unsigned long *start)
 void nd_iostat_end(struct bio *bio, unsigned long start);
 resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk);
 const u8 *nd_dev_to_uuid(struct device *dev);
+bool pmem_should_map_pages(struct device *dev);
 #endif /* __ND_H__ */
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 2f885e5d9c3651..c5ae2e57928824 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -148,9 +148,18 @@ static struct pmem_device *pmem_alloc(struct device *dev,
 		return ERR_PTR(-EBUSY);
 	}
 
-	pmem->virt_addr = memremap_pmem(dev, pmem->phys_addr, pmem->size);
-	if (!pmem->virt_addr)
-		return ERR_PTR(-ENXIO);
+	if (pmem_should_map_pages(dev)) {
+		void *addr = devm_memremap_pages(dev, res);
+
+		if (IS_ERR(addr))
+			return addr;
+		pmem->virt_addr = (void __pmem *) addr;
+	} else {
+		pmem->virt_addr = memremap_pmem(dev, pmem->phys_addr,
+				pmem->size);
+		if (!pmem->virt_addr)
+			return ERR_PTR(-ENXIO);
+	}
 
 	return pmem;
 }
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index da4338154ad219..529f3f02e7b2d9 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -758,6 +758,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
 	nd_region->provider_data = ndr_desc->provider_data;
 	nd_region->nd_set = ndr_desc->nd_set;
 	nd_region->num_lanes = ndr_desc->num_lanes;
+	nd_region->flags = ndr_desc->flags;
 	nd_region->ro = ro;
 	nd_region->numa_node = ndr_desc->numa_node;
 	ida_init(&nd_region->ns_ida);
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 75e3af01ee3258..3f021dc5da8c34 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -31,6 +31,9 @@ enum {
 	ND_CMD_ARS_STATUS_MAX = SZ_4K,
 	ND_MAX_MAPPINGS = 32,
 
+	/* region flag indicating to direct-map persistent memory by default */
+	ND_REGION_PAGEMAP = 0,
+
 	/* mark newly adjusted resources as requiring a label update */
 	DPA_RESOURCE_ADJUSTED = 1 << 0,
 };
@@ -91,6 +94,7 @@ struct nd_region_desc {
 	void *provider_data;
 	int num_lanes;
 	int numa_node;
+	unsigned long flags;
 };
 
 struct nvdimm_bus;

From adaac459759db4a1fd35baddbe47bac700095496 Mon Sep 17 00:00:00 2001
From: Markus Pargmann <mpa@pengutronix.de>
Date: Sun, 30 Aug 2015 09:33:53 +0200
Subject: [PATCH 449/734] regmap: Introduce max_raw_read/write for
 regmap_bulk_read/write

There are some buses which have a limit on the maximum number of bytes
that can be send/received. An example for this is
I2C_FUNC_SMBUS_I2C_BLOCK which does not support any reads/writes of more
than 32 bytes. The regmap_bulk operations should still be able to
utilize the full 32 bytes in this case.

Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/internal.h |  4 ++
 drivers/base/regmap/regmap.c   | 85 ++++++++++++++++++++++++++++------
 include/linux/regmap.h         |  4 ++
 3 files changed, 78 insertions(+), 15 deletions(-)

diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
index d744ae3926dd1c..fc554e357c5dd5 100644
--- a/drivers/base/regmap/internal.h
+++ b/drivers/base/regmap/internal.h
@@ -146,6 +146,10 @@ struct regmap {
 	/* if set, the device supports multi write mode */
 	bool can_multi_write;
 
+	/* if set, raw reads/writes are limited to this size */
+	size_t max_raw_read;
+	size_t max_raw_write;
+
 	struct rb_root range_tree;
 	void *selector_work_buf;	/* Scratch buffer used for selector */
 };
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index a6b6f7ee87eef7..7cbe42680877fd 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -579,6 +579,8 @@ struct regmap *regmap_init(struct device *dev,
 	map->use_single_read = config->use_single_rw || !bus || !bus->read;
 	map->use_single_write = config->use_single_rw || !bus || !bus->write;
 	map->can_multi_write = config->can_multi_write && bus && bus->write;
+	map->max_raw_read = bus->max_raw_read;
+	map->max_raw_write = bus->max_raw_write;
 	map->dev = dev;
 	map->bus = bus;
 	map->bus_context = bus_context;
@@ -1674,6 +1676,7 @@ int regmap_bulk_write(struct regmap *map, unsigned int reg, const void *val,
 {
 	int ret = 0, i;
 	size_t val_bytes = map->format.val_bytes;
+	size_t total_size = val_bytes * val_count;
 
 	if (map->bus && !map->format.parse_inplace)
 		return -EINVAL;
@@ -1722,16 +1725,37 @@ int regmap_bulk_write(struct regmap *map, unsigned int reg, const void *val,
 		}
 out:
 		map->unlock(map->lock_arg);
-	} else if (map->use_single_write) {
+	} else if (map->use_single_write ||
+		   (map->max_raw_write && map->max_raw_write < total_size)) {
+		int chunk_stride = map->reg_stride;
+		size_t chunk_size = val_bytes;
+		size_t chunk_count = val_count;
+
+		if (!map->use_single_write) {
+			chunk_size = map->max_raw_write;
+			if (chunk_size % val_bytes)
+				chunk_size -= chunk_size % val_bytes;
+			chunk_count = total_size / chunk_size;
+			chunk_stride *= chunk_size / val_bytes;
+		}
+
 		map->lock(map->lock_arg);
-		for (i = 0; i < val_count; i++) {
+		/* Write as many bytes as possible with chunk_size */
+		for (i = 0; i < chunk_count; i++) {
 			ret = _regmap_raw_write(map,
-						reg + (i * map->reg_stride),
-						val + (i * val_bytes),
-						val_bytes);
+						reg + (i * chunk_stride),
+						val + (i * chunk_size),
+						chunk_size);
 			if (ret)
 				break;
 		}
+
+		/* Write remaining bytes */
+		if (!ret && chunk_size * i < total_size) {
+			ret = _regmap_raw_write(map, reg + (i * chunk_stride),
+						val + (i * chunk_size),
+						total_size - i * chunk_size);
+		}
 		map->unlock(map->lock_arg);
 	} else {
 		void *wval;
@@ -2319,20 +2343,51 @@ int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val,
 		 * Some devices does not support bulk read, for
 		 * them we have a series of single read operations.
 		 */
-		if (map->use_single_read) {
-			for (i = 0; i < val_count; i++) {
-				ret = regmap_raw_read(map,
-						reg + (i * map->reg_stride),
-						val + (i * val_bytes),
-						val_bytes);
-				if (ret != 0)
-					return ret;
-			}
-		} else {
+		size_t total_size = val_bytes * val_count;
+
+		if (!map->use_single_read &&
+		    (!map->max_raw_read || map->max_raw_read > total_size)) {
 			ret = regmap_raw_read(map, reg, val,
 					      val_bytes * val_count);
 			if (ret != 0)
 				return ret;
+		} else {
+			/*
+			 * Some devices do not support bulk read or do not
+			 * support large bulk reads, for them we have a series
+			 * of read operations.
+			 */
+			int chunk_stride = map->reg_stride;
+			size_t chunk_size = val_bytes;
+			size_t chunk_count = val_count;
+
+			if (!map->use_single_read) {
+				chunk_size = map->max_raw_read;
+				if (chunk_size % val_bytes)
+					chunk_size -= chunk_size % val_bytes;
+				chunk_count = total_size / chunk_size;
+				chunk_stride *= chunk_size / val_bytes;
+			}
+
+			/* Read bytes that fit into a multiple of chunk_size */
+			for (i = 0; i < chunk_count; i++) {
+				ret = regmap_raw_read(map,
+						      reg + (i * chunk_stride),
+						      val + (i * chunk_size),
+						      chunk_size);
+				if (ret != 0)
+					return ret;
+			}
+
+			/* Read remaining bytes */
+			if (chunk_size * i < total_size) {
+				ret = regmap_raw_read(map,
+						      reg + (i * chunk_stride),
+						      val + (i * chunk_size),
+						      total_size - i * chunk_size);
+				if (ret != 0)
+					return ret;
+			}
 		}
 
 		for (i = 0; i < val_count * val_bytes; i += val_bytes)
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 73fc34d0c4c2ac..327b8f291d3f53 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -311,6 +311,8 @@ typedef void (*regmap_hw_free_context)(void *context);
  * @val_format_endian_default: Default endianness for formatted register
  *     values. Used when the regmap_config specifies DEFAULT. If this is
  *     DEFAULT, BIG is assumed.
+ * @max_raw_read: Max raw read size that can be used on the bus.
+ * @max_raw_write: Max raw write size that can be used on the bus.
  */
 struct regmap_bus {
 	bool fast_io;
@@ -325,6 +327,8 @@ struct regmap_bus {
 	u8 read_flag_mask;
 	enum regmap_endian reg_format_endian_default;
 	enum regmap_endian val_format_endian_default;
+	size_t max_raw_read;
+	size_t max_raw_write;
 };
 
 struct regmap *regmap_init(struct device *dev,

From f50c9eb4e9304cf555206c93152f580c0e7213b2 Mon Sep 17 00:00:00 2001
From: Markus Pargmann <mpa@pengutronix.de>
Date: Sun, 30 Aug 2015 09:33:54 +0200
Subject: [PATCH 450/734] regmap: regmap max_raw_read/write getter functions

Add functions to access the maximum size we can read/write using
regmap_raw_read/write().

This helps drivers that need to know how much they can write with the
raw functions without problems. There are some devices (e.g. bmc150)
that have fifos as registers which need to be read in specific chunks
otherwise samples are dropped.

Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c | 22 ++++++++++++++++++++++
 include/linux/regmap.h       |  2 ++
 2 files changed, 24 insertions(+)

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 7cbe42680877fd..47210101e308fe 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -1393,6 +1393,28 @@ bool regmap_can_raw_write(struct regmap *map)
 }
 EXPORT_SYMBOL_GPL(regmap_can_raw_write);
 
+/**
+ * regmap_get_raw_read_max - Get the maximum size we can read
+ *
+ * @map: Map to check.
+ */
+size_t regmap_get_raw_read_max(struct regmap *map)
+{
+	return map->max_raw_read;
+}
+EXPORT_SYMBOL_GPL(regmap_get_raw_read_max);
+
+/**
+ * regmap_get_raw_write_max - Get the maximum size we can read
+ *
+ * @map: Map to check.
+ */
+size_t regmap_get_raw_write_max(struct regmap *map)
+{
+	return map->max_raw_write;
+}
+EXPORT_SYMBOL_GPL(regmap_get_raw_write_max);
+
 static int _regmap_bus_formatted_write(void *context, unsigned int reg,
 				       unsigned int val)
 {
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 327b8f291d3f53..6724d0e3819eef 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -444,6 +444,8 @@ int regmap_get_max_register(struct regmap *map);
 int regmap_get_reg_stride(struct regmap *map);
 int regmap_async_complete(struct regmap *map);
 bool regmap_can_raw_write(struct regmap *map);
+size_t regmap_get_raw_read_max(struct regmap *map);
+size_t regmap_get_raw_write_max(struct regmap *map);
 
 int regcache_sync(struct regmap *map);
 int regcache_sync_region(struct regmap *map, unsigned int min,

From c335931ed9d22c30017cf957518262c2fe6502ce Mon Sep 17 00:00:00 2001
From: Markus Pargmann <mpa@pengutronix.de>
Date: Sun, 30 Aug 2015 09:33:55 +0200
Subject: [PATCH 451/734] regmap: Add raw_write/read checks for
 max_raw_write/read sizes

Check in regmap_raw_read() and regmap_raw_write() for correct maximum
sizes of the operations. Return -E2BIG if this size is not supported
because it is too big.

Also this patch causes an uninitialized variable warning so it
initializes ret (although not necessary).

Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 47210101e308fe..d2efa4b3329435 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -1584,6 +1584,8 @@ int regmap_raw_write(struct regmap *map, unsigned int reg,
 		return -EINVAL;
 	if (val_len % map->format.val_bytes)
 		return -EINVAL;
+	if (map->max_raw_write && map->max_raw_write > val_len)
+		return -E2BIG;
 
 	map->lock(map->lock_arg);
 
@@ -2256,6 +2258,10 @@ int regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
 			ret = -ENOTSUPP;
 			goto out;
 		}
+		if (map->max_raw_read && map->max_raw_read < val_len) {
+			ret = -E2BIG;
+			goto out;
+		}
 
 		/* Physical block read if there's no cache involved */
 		ret = _regmap_raw_read(map, reg, val, val_len);

From 29332534e2b68b5889a40ccb6606ba0d06750a69 Mon Sep 17 00:00:00 2001
From: Markus Pargmann <mpa@pengutronix.de>
Date: Sun, 30 Aug 2015 09:33:56 +0200
Subject: [PATCH 452/734] regmap-i2c: Add smbus i2c block support

This allows to read/write up to 32 bytes of data and is to be prefered
if supported before the register read/write smbus support.

Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap-i2c.c | 49 ++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/drivers/base/regmap/regmap-i2c.c b/drivers/base/regmap/regmap-i2c.c
index 4b76e33110a2d1..ddb9b0efb7242f 100644
--- a/drivers/base/regmap/regmap-i2c.c
+++ b/drivers/base/regmap/regmap-i2c.c
@@ -209,11 +209,60 @@ static struct regmap_bus regmap_i2c = {
 	.val_format_endian_default = REGMAP_ENDIAN_BIG,
 };
 
+static int regmap_i2c_smbus_i2c_write(void *context, const void *data,
+				      size_t count)
+{
+	struct device *dev = context;
+	struct i2c_client *i2c = to_i2c_client(dev);
+
+	if (count < 1)
+		return -EINVAL;
+	if (count >= I2C_SMBUS_BLOCK_MAX)
+		return -E2BIG;
+
+	--count;
+	return i2c_smbus_write_i2c_block_data(i2c, ((u8 *)data)[0], count,
+					      ((u8 *)data + 1));
+}
+
+static int regmap_i2c_smbus_i2c_read(void *context, const void *reg,
+				     size_t reg_size, void *val,
+				     size_t val_size)
+{
+	struct device *dev = context;
+	struct i2c_client *i2c = to_i2c_client(dev);
+	int ret;
+
+	if (reg_size != 1 || val_size < 1)
+		return -EINVAL;
+	if (val_size >= I2C_SMBUS_BLOCK_MAX)
+		return -E2BIG;
+
+	ret = i2c_smbus_read_i2c_block_data(i2c, ((u8 *)reg)[0], val_size, val);
+	if (ret == val_size)
+		return 0;
+	else if (ret < 0)
+		return ret;
+	else
+		return -EIO;
+}
+
+static struct regmap_bus regmap_i2c_smbus_i2c_block = {
+	.write = regmap_i2c_smbus_i2c_write,
+	.read = regmap_i2c_smbus_i2c_read,
+	.max_raw_read = I2C_SMBUS_BLOCK_MAX,
+	.max_raw_write = I2C_SMBUS_BLOCK_MAX,
+};
+
 static const struct regmap_bus *regmap_get_i2c_bus(struct i2c_client *i2c,
 					const struct regmap_config *config)
 {
 	if (i2c_check_functionality(i2c->adapter, I2C_FUNC_I2C))
 		return &regmap_i2c;
+	else if (config->reg_bits == 8 &&
+		 i2c_check_functionality(i2c->adapter,
+					 I2C_FUNC_SMBUS_I2C_BLOCK))
+		return &regmap_i2c_smbus_i2c_block;
 	else if (config->val_bits == 16 && config->reg_bits == 8 &&
 		 i2c_check_functionality(i2c->adapter,
 					 I2C_FUNC_SMBUS_WORD_DATA))

From 71f8a4b81d040b3d094424197ca2f1bf811b1245 Mon Sep 17 00:00:00 2001
From: Jialing Fu <jlfu@marvell.com>
Date: Fri, 28 Aug 2015 11:13:09 +0800
Subject: [PATCH 453/734] mmc: core: fix race condition in mmc_wait_data_done

The following panic is captured in ker3.14, but the issue still exists
in latest kernel.
---------------------------------------------------------------------
[   20.738217] c0 3136 (Compiler) Unable to handle kernel NULL pointer dereference
at virtual address 00000578
......
[   20.738499] c0 3136 (Compiler) PC is at _raw_spin_lock_irqsave+0x24/0x60
[   20.738527] c0 3136 (Compiler) LR is at _raw_spin_lock_irqsave+0x20/0x60
[   20.740134] c0 3136 (Compiler) Call trace:
[   20.740165] c0 3136 (Compiler) [<ffffffc0008ee900>] _raw_spin_lock_irqsave+0x24/0x60
[   20.740200] c0 3136 (Compiler) [<ffffffc0000dd024>] __wake_up+0x1c/0x54
[   20.740230] c0 3136 (Compiler) [<ffffffc000639414>] mmc_wait_data_done+0x28/0x34
[   20.740262] c0 3136 (Compiler) [<ffffffc0006391a0>] mmc_request_done+0xa4/0x220
[   20.740314] c0 3136 (Compiler) [<ffffffc000656894>] sdhci_tasklet_finish+0xac/0x264
[   20.740352] c0 3136 (Compiler) [<ffffffc0000a2b58>] tasklet_action+0xa0/0x158
[   20.740382] c0 3136 (Compiler) [<ffffffc0000a2078>] __do_softirq+0x10c/0x2e4
[   20.740411] c0 3136 (Compiler) [<ffffffc0000a24bc>] irq_exit+0x8c/0xc0
[   20.740439] c0 3136 (Compiler) [<ffffffc00008489c>] handle_IRQ+0x48/0xac
[   20.740469] c0 3136 (Compiler) [<ffffffc000081428>] gic_handle_irq+0x38/0x7c
----------------------------------------------------------------------
Because in SMP, "mrq" has race condition between below two paths:
path1: CPU0: <tasklet context>
  static void mmc_wait_data_done(struct mmc_request *mrq)
  {
     mrq->host->context_info.is_done_rcv = true;
     //
     // If CPU0 has just finished "is_done_rcv = true" in path1, and at
     // this moment, IRQ or ICache line missing happens in CPU0.
     // What happens in CPU1 (path2)?
     //
     // If the mmcqd thread in CPU1(path2) hasn't entered to sleep mode:
     // path2 would have chance to break from wait_event_interruptible
     // in mmc_wait_for_data_req_done and continue to run for next
     // mmc_request (mmc_blk_rw_rq_prep).
     //
     // Within mmc_blk_rq_prep, mrq is cleared to 0.
     // If below line still gets host from "mrq" as the result of
     // compiler, the panic happens as we traced.
     wake_up_interruptible(&mrq->host->context_info.wait);
  }

path2: CPU1: <The mmcqd thread runs mmc_queue_thread>
  static int mmc_wait_for_data_req_done(...
  {
     ...
     while (1) {
           wait_event_interruptible(context_info->wait,
                   (context_info->is_done_rcv ||
                    context_info->is_new_req));
     	   static void mmc_blk_rw_rq_prep(...
           {
           ...
           memset(brq, 0, sizeof(struct mmc_blk_request));

This issue happens very coincidentally; however adding mdelay(1) in
mmc_wait_data_done as below could duplicate it easily.

   static void mmc_wait_data_done(struct mmc_request *mrq)
   {
     mrq->host->context_info.is_done_rcv = true;
+    mdelay(1);
     wake_up_interruptible(&mrq->host->context_info.wait);
    }

At runtime, IRQ or ICache line missing may just happen at the same place
of the mdelay(1).

This patch gets the mmc_context_info at the beginning of function, it can
avoid this race condition.

Signed-off-by: Jialing Fu <jlfu@marvell.com>
Tested-by: Shawn Lin <shawn.lin@rock-chips.com>
Fixes: 2220eedfd7ae ("mmc: fix async request mechanism ....")
Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/core.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 664b61729fa963..0520064dc33beb 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -358,8 +358,10 @@ EXPORT_SYMBOL(mmc_start_bkops);
  */
 static void mmc_wait_data_done(struct mmc_request *mrq)
 {
-	mrq->host->context_info.is_done_rcv = true;
-	wake_up_interruptible(&mrq->host->context_info.wait);
+	struct mmc_context_info *context_info = &mrq->host->context_info;
+
+	context_info->is_done_rcv = true;
+	wake_up_interruptible(&context_info->wait);
 }
 
 static void mmc_wait_done(struct mmc_request *mrq)

From d5b98eb12420ce856caaf57dc5256eedc56a3747 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Fri, 28 Aug 2015 20:04:53 +0100
Subject: [PATCH 454/734] regmap: Support bulk reads for devices without raw
 formatting

When doing a bulk read from a device which lacks raw I/O support we fall
back to doing register at a time reads but we still use the raw
formatters in order to render the data into the word size used by the
device (since bulk reads still operate on the device word size rather
than unsigned ints).  This means that devices without raw formatting
such as those that provide reg_read() are not supported.  Provide
handling for them by copying the values read into native endian values
of the appropriate size.

Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 27456c7978b901..b77f1c6abdad26 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -2338,7 +2338,34 @@ int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val,
 					  &ival);
 			if (ret != 0)
 				return ret;
-			map->format.format_val(val + (i * val_bytes), ival, 0);
+
+			if (map->format.format_val) {
+				map->format.format_val(val + (i * val_bytes), ival, 0);
+			} else {
+				/* Devices providing read and write
+				 * operations can use the bulk I/O
+				 * functions if they define a val_bytes,
+				 * we assume that the values are native
+				 * endian.
+				 */
+				u32 *u32 = val;
+				u16 *u16 = val;
+				u8 *u8 = val;
+
+				switch (map->format.val_bytes) {
+				case 4:
+					u32[i] = ival;
+					break;
+				case 2:
+					u16[i] = ival;
+					break;
+				case 1:
+					u8[i] = ival;
+					break;
+				default:
+					return -EINVAL;
+				}
+			}
 		}
 	}
 

From 17649c90ff4c5246bb4babf6260029968a6d119d Mon Sep 17 00:00:00 2001
From: Sergey SENOZHATSKY <sergey.senozhatsky.work@gmail.com>
Date: Mon, 31 Aug 2015 18:54:58 +0900
Subject: [PATCH 455/734] regmap: fix a NULL pointer dereference in
 __regmap_init

__regmap_init() may receive a NULL `struct regmap_bus *bus' pointer,
for example, from snd_hdac_regmap_init(), and it make sure that it
does not NULL deference `bus`, except around ->max_raw_read and
->max_raw_write initialisation. Add missing check.

Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index d2efa4b3329435..2ffdb62f75f730 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -579,8 +579,10 @@ struct regmap *regmap_init(struct device *dev,
 	map->use_single_read = config->use_single_rw || !bus || !bus->read;
 	map->use_single_write = config->use_single_rw || !bus || !bus->write;
 	map->can_multi_write = config->can_multi_write && bus && bus->write;
-	map->max_raw_read = bus->max_raw_read;
-	map->max_raw_write = bus->max_raw_write;
+	if (bus) {
+		map->max_raw_read = bus->max_raw_read;
+		map->max_raw_write = bus->max_raw_write;
+	}
 	map->dev = dev;
 	map->bus = bus;
 	map->bus_context = bus_context;

From f0a515780393dffbb363e6d1567da46af6f3c5f3 Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Fri, 28 Aug 2015 07:06:58 -0400
Subject: [PATCH 456/734] tracing: Don't make assumptions about length of
 string on task rename

While the dest comm string size is assured to be at least TASK_COMM_LEN long,
doing a memcpy() also adds the assumption that the source is at least that
long as well, which isn't assured, and isn't true in cases such as:

	set_task_comm(worker->task, "kworker/dying");

This leads to accessing invalid memory.

Link: http://lkml.kernel.org/r/1440760018-1557-1-git-send-email-sasha.levin@oracle.com

Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/events/task.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/trace/events/task.h b/include/trace/events/task.h
index dee3bb1d5a6b5b..2cca6cd342d897 100644
--- a/include/trace/events/task.h
+++ b/include/trace/events/task.h
@@ -46,7 +46,7 @@ TRACE_EVENT(task_rename,
 	TP_fast_assign(
 		__entry->pid = task->pid;
 		memcpy(entry->oldcomm, task->comm, TASK_COMM_LEN);
-		memcpy(entry->newcomm, comm, TASK_COMM_LEN);
+		strlcpy(entry->newcomm, comm, TASK_COMM_LEN);
 		__entry->oom_score_adj = task->signal->oom_score_adj;
 	),
 

From bdbf4a29ee5bbe5b6743ba47bc7b10ec2cba8b21 Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vz@mleia.com>
Date: Wed, 26 Aug 2015 23:59:33 +0300
Subject: [PATCH 457/734] i2c: muxes: fix leaked i2c adapter device node
 references

Every call of of_parse_phandle() increments user count of found device
node, if OF_DYNAMIC is enabled.

The change fixes all similar addressed cases in drivers/i2c.

Signed-off-by: Vladimir Zapolskiy <vz@mleia.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/muxes/i2c-arb-gpio-challenge.c | 1 +
 drivers/i2c/muxes/i2c-mux-gpio.c           | 1 +
 drivers/i2c/muxes/i2c-mux-pinctrl.c        | 1 +
 drivers/i2c/muxes/i2c-mux-reg.c            | 1 +
 4 files changed, 4 insertions(+)

diff --git a/drivers/i2c/muxes/i2c-arb-gpio-challenge.c b/drivers/i2c/muxes/i2c-arb-gpio-challenge.c
index 71aac0911bf75e..402e3a6c671a11 100644
--- a/drivers/i2c/muxes/i2c-arb-gpio-challenge.c
+++ b/drivers/i2c/muxes/i2c-arb-gpio-challenge.c
@@ -197,6 +197,7 @@ static int i2c_arbitrator_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 	arb->parent = of_get_i2c_adapter_by_node(parent_np);
+	of_node_put(parent_np);
 	if (!arb->parent) {
 		dev_err(dev, "Cannot find parent bus\n");
 		return -EPROBE_DEFER;
diff --git a/drivers/i2c/muxes/i2c-mux-gpio.c b/drivers/i2c/muxes/i2c-mux-gpio.c
index 70db99264339ef..b8e11c16d98c60 100644
--- a/drivers/i2c/muxes/i2c-mux-gpio.c
+++ b/drivers/i2c/muxes/i2c-mux-gpio.c
@@ -76,6 +76,7 @@ static int i2c_mux_gpio_probe_dt(struct gpiomux *mux,
 		return -ENODEV;
 	}
 	adapter = of_find_i2c_adapter_by_node(adapter_np);
+	of_node_put(adapter_np);
 	if (!adapter)
 		return -EPROBE_DEFER;
 
diff --git a/drivers/i2c/muxes/i2c-mux-pinctrl.c b/drivers/i2c/muxes/i2c-mux-pinctrl.c
index b48378c4b40d64..b5a982ba88986d 100644
--- a/drivers/i2c/muxes/i2c-mux-pinctrl.c
+++ b/drivers/i2c/muxes/i2c-mux-pinctrl.c
@@ -111,6 +111,7 @@ static int i2c_mux_pinctrl_parse_dt(struct i2c_mux_pinctrl *mux,
 		return -ENODEV;
 	}
 	adapter = of_find_i2c_adapter_by_node(adapter_np);
+	of_node_put(adapter_np);
 	if (!adapter) {
 		dev_err(mux->dev, "Cannot find parent bus\n");
 		return -EPROBE_DEFER;
diff --git a/drivers/i2c/muxes/i2c-mux-reg.c b/drivers/i2c/muxes/i2c-mux-reg.c
index 86d41d36a78340..57ec57e0da529f 100644
--- a/drivers/i2c/muxes/i2c-mux-reg.c
+++ b/drivers/i2c/muxes/i2c-mux-reg.c
@@ -106,6 +106,7 @@ static int i2c_mux_reg_probe_dt(struct regmux *mux,
 		return -ENODEV;
 	}
 	adapter = of_find_i2c_adapter_by_node(adapter_np);
+	of_node_put(adapter_np);
 	if (!adapter)
 		return -EPROBE_DEFER;
 

From 07f081fb5057b2ea98baeca3a47bf0eb33e94aa1 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sun, 30 Aug 2015 16:59:57 +0100
Subject: [PATCH 458/734] PKCS#7: Add OIDs for sha224, sha284 and sha512 hash
 algos and use them

Add OIDs for sha224, sha284 and sha512 hash algos and use them to select
the hashing algorithm.  Without this, something like the following error
might get written to dmesg:

[   31.829322] PKCS7: Unknown OID: [32] 2.16.840.1.101.3.4.2.3
[   31.829328] PKCS7: Unknown OID: [180] 2.16.840.1.101.3.4.2.3
[   31.829330] Unsupported digest algo: 55

Where the 55 on the third line is OID__NR indicating an unknown OID.

Reported-by: Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-By: Valdis Kletnieks <valdis.kletnieks@vt.edu>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 crypto/asymmetric_keys/mscode_parser.c | 9 +++++++++
 crypto/asymmetric_keys/pkcs7_parser.c  | 8 ++++++++
 include/linux/oid_registry.h           | 3 +++
 3 files changed, 20 insertions(+)

diff --git a/crypto/asymmetric_keys/mscode_parser.c b/crypto/asymmetric_keys/mscode_parser.c
index 214a992123cdd5..adcef59eec0b6a 100644
--- a/crypto/asymmetric_keys/mscode_parser.c
+++ b/crypto/asymmetric_keys/mscode_parser.c
@@ -97,6 +97,15 @@ int mscode_note_digest_algo(void *context, size_t hdrlen,
 	case OID_sha256:
 		ctx->digest_algo = HASH_ALGO_SHA256;
 		break;
+	case OID_sha384:
+		ctx->digest_algo = HASH_ALGO_SHA384;
+		break;
+	case OID_sha512:
+		ctx->digest_algo = HASH_ALGO_SHA512;
+		break;
+	case OID_sha224:
+		ctx->digest_algo = HASH_ALGO_SHA224;
+		break;
 
 	case OID__NR:
 		sprint_oid(value, vlen, buffer, sizeof(buffer));
diff --git a/crypto/asymmetric_keys/pkcs7_parser.c b/crypto/asymmetric_keys/pkcs7_parser.c
index e6298b7a945a97..758acabf2d8197 100644
--- a/crypto/asymmetric_keys/pkcs7_parser.c
+++ b/crypto/asymmetric_keys/pkcs7_parser.c
@@ -229,6 +229,14 @@ int pkcs7_sig_note_digest_algo(void *context, size_t hdrlen,
 	case OID_sha256:
 		ctx->sinfo->sig.pkey_hash_algo = HASH_ALGO_SHA256;
 		break;
+	case OID_sha384:
+		ctx->sinfo->sig.pkey_hash_algo = HASH_ALGO_SHA384;
+		break;
+	case OID_sha512:
+		ctx->sinfo->sig.pkey_hash_algo = HASH_ALGO_SHA512;
+		break;
+	case OID_sha224:
+		ctx->sinfo->sig.pkey_hash_algo = HASH_ALGO_SHA224;
 	default:
 		printk("Unsupported digest algo: %u\n", ctx->last_oid);
 		return -ENOPKG;
diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h
index 93e0ff92fb9b62..d2fa9ca42e9a74 100644
--- a/include/linux/oid_registry.h
+++ b/include/linux/oid_registry.h
@@ -63,6 +63,9 @@ enum OID {
 	OID_certAuthInfoAccess,		/* 1.3.6.1.5.5.7.1.1 */
 	OID_sha1,			/* 1.3.14.3.2.26 */
 	OID_sha256,			/* 2.16.840.1.101.3.4.2.1 */
+	OID_sha384,			/* 2.16.840.1.101.3.4.2.2 */
+	OID_sha512,			/* 2.16.840.1.101.3.4.2.3 */
+	OID_sha224,			/* 2.16.840.1.101.3.4.2.4 */
 
 	/* Distinguished Name attribute IDs [RFC 2256] */
 	OID_commonName,			/* 2.5.4.3 */

From b4df20474ff76b4dbab7d991de271820383995b8 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 24 Aug 2015 22:54:21 +0300
Subject: [PATCH 459/734] fbdev: fix snprintf() limit in show_bl_curve()

The limit should be "PAGE_SIZE - len" instead of PAGE_SIZE.  Also let's
use scnprintf() because snprintf() returns the number of bytes which
would have been printed if there were space and scnprintf() returns the
number of bytes actually printed.

I don't think we are ever going to actually hit this limit in real life.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 drivers/video/fbdev/core/fbsysfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/video/fbdev/core/fbsysfs.c b/drivers/video/fbdev/core/fbsysfs.c
index 60c3f0a1634111..15755ce1d26c81 100644
--- a/drivers/video/fbdev/core/fbsysfs.c
+++ b/drivers/video/fbdev/core/fbsysfs.c
@@ -485,7 +485,7 @@ static ssize_t show_bl_curve(struct device *device,
 
 	mutex_lock(&fb_info->bl_curve_mutex);
 	for (i = 0; i < FB_BACKLIGHT_LEVELS; i += 8)
-		len += snprintf(&buf[len], PAGE_SIZE, "%8ph\n",
+		len += scnprintf(&buf[len], PAGE_SIZE - len, "%8ph\n",
 				fb_info->bl_curve + i);
 	mutex_unlock(&fb_info->bl_curve_mutex);
 

From c4e6774de19f4638bb887a47a58ee21d6809360b Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Sun, 23 Aug 2015 02:11:15 +0200
Subject: [PATCH 460/734] fbdev: ssd1307fb: fix error return code

Propagate error code on failure.  Also changed %ld to %d in dev_err to use
ret variable rather than putting two calls to PTR_ERR.

A simplified version of the semantic match that finds the first problem is as
follows: (http://coccinelle.lip6.fr/)

// <smpl>
@@
identifier ret; expression e1,e2;
@@
(
if (\(ret < 0\|ret != 0\))
 { ... return ret; }
|
ret = 0
)
... when != ret = e1
    when != &ret
*if(...)
{
  ... when != ret = e2
      when forall
 return ret;
}
// </smpl>

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 drivers/video/fbdev/ssd1307fb.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/video/fbdev/ssd1307fb.c b/drivers/video/fbdev/ssd1307fb.c
index b6edd28b267f2b..93f4c902d0f9a6 100644
--- a/drivers/video/fbdev/ssd1307fb.c
+++ b/drivers/video/fbdev/ssd1307fb.c
@@ -656,8 +656,9 @@ static int ssd1307fb_probe(struct i2c_client *client,
 	bl = backlight_device_register(bl_name, &client->dev, par,
 				       &ssd1307fb_bl_ops, NULL);
 	if (IS_ERR(bl)) {
-		dev_err(&client->dev, "unable to register backlight device: %ld\n",
-			PTR_ERR(bl));
+		ret = PTR_ERR(bl);
+		dev_err(&client->dev, "unable to register backlight device: %d\n",
+			ret);
 		goto bl_init_error;
 	}
 

From 7b554088d56e9df3ee5dff8989b44903965bad95 Mon Sep 17 00:00:00 2001
From: Vaishali Thakkar <vthakkar1994@gmail.com>
Date: Tue, 18 Aug 2015 08:58:40 +0530
Subject: [PATCH 461/734] video: fbdev: pxa168fb: Use devm_clk_get

This patch introduces the use of managed resource function
devm_clk_get instead of clk_get and removes corresponding calls
to clk_put in the probe and remove functions.

To be compatible with the change various gotos are replaced with
direct returns, and unneeded label failed_put_clk is dropped.

Signed-off-by: Vaishali Thakkar <vthakkar1994@gmail.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 drivers/video/fbdev/pxa168fb.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/drivers/video/fbdev/pxa168fb.c b/drivers/video/fbdev/pxa168fb.c
index e209b039f55304..efb57c05999764 100644
--- a/drivers/video/fbdev/pxa168fb.c
+++ b/drivers/video/fbdev/pxa168fb.c
@@ -615,7 +615,7 @@ static int pxa168fb_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	clk = clk_get(&pdev->dev, "LCDCLK");
+	clk = devm_clk_get(&pdev->dev, "LCDCLK");
 	if (IS_ERR(clk)) {
 		dev_err(&pdev->dev, "unable to get LCDCLK");
 		return PTR_ERR(clk);
@@ -624,21 +624,18 @@ static int pxa168fb_probe(struct platform_device *pdev)
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (res == NULL) {
 		dev_err(&pdev->dev, "no IO memory defined\n");
-		ret = -ENOENT;
-		goto failed_put_clk;
+		return -ENOENT;
 	}
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0) {
 		dev_err(&pdev->dev, "no IRQ defined\n");
-		ret = -ENOENT;
-		goto failed_put_clk;
+		return -ENOENT;
 	}
 
 	info = framebuffer_alloc(sizeof(struct pxa168fb_info), &pdev->dev);
 	if (info == NULL) {
-		ret = -ENOMEM;
-		goto failed_put_clk;
+		return -ENOMEM;
 	}
 
 	/* Initialize private data */
@@ -776,8 +773,6 @@ static int pxa168fb_probe(struct platform_device *pdev)
 			info->screen_base, fbi->fb_start_dma);
 failed_free_info:
 	kfree(info);
-failed_put_clk:
-	clk_put(clk);
 
 	dev_err(&pdev->dev, "frame buffer device init failed with %d\n", ret);
 	return ret;
@@ -813,7 +808,6 @@ static int pxa168fb_remove(struct platform_device *pdev)
 				info->screen_base, info->fix.smem_start);
 
 	clk_disable(fbi->clk);
-	clk_put(fbi->clk);
 
 	framebuffer_release(info);
 

From 092b6dbe8a4a24c17f2ebfe86995dc994e61f420 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 17 Aug 2015 10:09:26 +0200
Subject: [PATCH 462/734] mmc: android-goldfish: remove incorrect __iomem
 annotation

Make sparse happy:

drivers/mmc/host/android-goldfish.c:535:56: sparse: incorrect type in
argument 3 (different address spaces)

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/android-goldfish.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/host/android-goldfish.c b/drivers/mmc/host/android-goldfish.c
index b1eac719a4cca2..dca5518b01395a 100644
--- a/drivers/mmc/host/android-goldfish.c
+++ b/drivers/mmc/host/android-goldfish.c
@@ -118,7 +118,7 @@ struct goldfish_mmc_host {
 	struct mmc_host		*mmc;
 	struct device		*dev;
 	unsigned char		id; /* 16xx chips have 2 MMC blocks */
-	void __iomem		*virt_base;
+	void			*virt_base;
 	unsigned int		phys_base;
 	int			irq;
 	unsigned char		bus_mode;

From 2dfb28e3410d775a687d429f5cadf88e795e7eb6 Mon Sep 17 00:00:00 2001
From: Naresh Kamboju <naresh.kamboju@linaro.org>
Date: Mon, 31 Aug 2015 17:11:55 +0530
Subject: [PATCH 463/734] selftests/zram: must be run as root

Adding new functionality check_prereqs() to check test must be run as root

Signed-off-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/zram/zram.sh     | 12 ++++++++++++
 tools/testing/selftests/zram/zram01.sh   |  1 +
 tools/testing/selftests/zram/zram02.sh   |  1 +
 tools/testing/selftests/zram/zram_lib.sh | 10 ++++++++++
 4 files changed, 24 insertions(+)

diff --git a/tools/testing/selftests/zram/zram.sh b/tools/testing/selftests/zram/zram.sh
index 6ea4b6a5ccab61..20de9a7612692c 100755
--- a/tools/testing/selftests/zram/zram.sh
+++ b/tools/testing/selftests/zram/zram.sh
@@ -1,6 +1,16 @@
 #!/bin/bash
 TCID="zram.sh"
 
+check_prereqs()
+{
+	local msg="skip all tests:"
+
+	if [ $UID != 0 ]; then
+		echo $msg must be run as root >&2
+		exit 0
+	fi
+}
+
 run_zram () {
 echo "--------------------"
 echo "running zram tests"
@@ -10,6 +20,8 @@ echo ""
 ./zram02.sh
 }
 
+check_prereqs
+
 # check zram module exists
 MODULE_PATH=/lib/modules/`uname -r`/kernel/drivers/block/zram/zram.ko
 if [ -f $MODULE_PATH ]; then
diff --git a/tools/testing/selftests/zram/zram01.sh b/tools/testing/selftests/zram/zram01.sh
index 2a2475d4b201ac..b9566a6478a9ce 100755
--- a/tools/testing/selftests/zram/zram01.sh
+++ b/tools/testing/selftests/zram/zram01.sh
@@ -79,6 +79,7 @@ zram_fill_fs()
 	echo "zram compression ratio: $(echo "scale=2; $v / 100 " | bc):1: OK"
 }
 
+check_prereqs
 zram_load
 zram_max_streams
 zram_compress_alg
diff --git a/tools/testing/selftests/zram/zram02.sh b/tools/testing/selftests/zram/zram02.sh
index 96e82dcc195b42..74569b883737ff 100755
--- a/tools/testing/selftests/zram/zram02.sh
+++ b/tools/testing/selftests/zram/zram02.sh
@@ -37,6 +37,7 @@ zram_max_streams="2"
 zram_sizes="1048576" # 1M
 zram_mem_limits="1M"
 
+check_prereqs
 zram_load
 zram_max_streams
 zram_set_disksizes
diff --git a/tools/testing/selftests/zram/zram_lib.sh b/tools/testing/selftests/zram/zram_lib.sh
index 3b6abf1942d728..424e68ed1487d8 100755
--- a/tools/testing/selftests/zram/zram_lib.sh
+++ b/tools/testing/selftests/zram/zram_lib.sh
@@ -20,6 +20,16 @@ dev_mounted=-1
 
 trap INT
 
+check_prereqs()
+{
+	local msg="skip all tests:"
+
+	if [ $UID != 0 ]; then
+		echo $msg must be run as root >&2
+		exit 0
+	fi
+}
+
 zram_cleanup()
 {
 	echo "zram cleanup"

From 7eba7d90d48217a97afc171a311b386bde965f39 Mon Sep 17 00:00:00 2001
From: Naresh Kamboju <naresh.kamboju@linaro.org>
Date: Mon, 31 Aug 2015 17:11:56 +0530
Subject: [PATCH 464/734] selftests/zram: Makefile fix

Do not override run_tests, The default rule will just run TEST_PROGS

Signed-off-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/zram/Makefile | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tools/testing/selftests/zram/Makefile b/tools/testing/selftests/zram/Makefile
index ec45513c730355..29d80346e3eb5a 100644
--- a/tools/testing/selftests/zram/Makefile
+++ b/tools/testing/selftests/zram/Makefile
@@ -5,8 +5,5 @@ TEST_FILES := zram01.sh zram02.sh zram_lib.sh
 
 include ../lib.mk
 
-run_tests:
-	@/bin/bash ./zram.sh
-
 clean:
 	$(RM) err.log

From 4a6ac3044f1504fe4521efa7334095c9c9ceecfd Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 1 Sep 2015 10:08:53 +0100
Subject: [PATCH 465/734] irqchip/GIC: Fix EOImode setting for non-DT/ACPI
 systems

Non-DT/ACPI systems call directly into the GIC driver at init time.
Turns out 0b996fd35957 ("irqchip/GIC: Convert to EOImode == 1")
breaks old non firmware-driven platforms, as the driver only
works out the capability of the platform on the DT/ACPI paths.

Fix this thinko by forcing EOImode==0 on non-DT platforms,
which are not capable of supporting a hypervisor anyway.

Reported-and-tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: Jason Cooper <jason@lakedaemon.net>
Link: http://lkml.kernel.org/r/1441098533-31523-1-git-send-email-marc.zyngier@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/irqchip/irq-gic.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 72bf81b8abfcdf..e6b7ed53795294 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -993,7 +993,7 @@ static const struct irq_domain_ops gic_irq_domain_ops = {
 	.xlate = gic_irq_domain_xlate,
 };
 
-void __init gic_init_bases(unsigned int gic_nr, int irq_start,
+static void __init __gic_init_bases(unsigned int gic_nr, int irq_start,
 			   void __iomem *dist_base, void __iomem *cpu_base,
 			   u32 percpu_offset, struct device_node *node)
 {
@@ -1103,6 +1103,19 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
 	gic_pm_init(gic);
 }
 
+void __init gic_init_bases(unsigned int gic_nr, int irq_start,
+			   void __iomem *dist_base, void __iomem *cpu_base,
+			   u32 percpu_offset, struct device_node *node)
+{
+	/*
+	 * Non-DT/ACPI systems won't run a hypervisor, so let's not
+	 * bother with these...
+	 */
+	static_key_slow_dec(&supports_deactivate);
+	__gic_init_bases(gic_nr, irq_start, dist_base, cpu_base,
+			 percpu_offset, node);
+}
+
 #ifdef CONFIG_OF
 static int gic_cnt __initdata;
 
@@ -1137,7 +1150,7 @@ gic_of_init(struct device_node *node, struct device_node *parent)
 	if (of_property_read_u32(node, "cpu-offset", &percpu_offset))
 		percpu_offset = 0;
 
-	gic_init_bases(gic_cnt, -1, dist_base, cpu_base, percpu_offset, node);
+	__gic_init_bases(gic_cnt, -1, dist_base, cpu_base, percpu_offset, node);
 	if (!gic_cnt)
 		gic_init_physaddr(node);
 
@@ -1265,7 +1278,7 @@ gic_v2_acpi_init(struct acpi_table_header *table)
 	 * as default IRQ domain to allow for GSI registration and GSI to IRQ
 	 * number translation (see acpi_register_gsi() and acpi_gsi_to_irq()).
 	 */
-	gic_init_bases(0, -1, dist_base, cpu_base, 0, NULL);
+	__gic_init_bases(0, -1, dist_base, cpu_base, 0, NULL);
 	irq_set_default_host(gic_data[0].domain);
 
 	acpi_irq_model = ACPI_IRQ_MODEL_GIC;

From 57817e619a215588739f3f644986c78b586b541b Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Mon, 10 Aug 2015 16:31:54 +0200
Subject: [PATCH 466/734] video: fbdev: atmel_lcdfb: remove useless include

Definitions from linux/platform_data/atmel.h are not used, remove the
include.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 drivers/video/fbdev/atmel_lcdfb.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/video/fbdev/atmel_lcdfb.c b/drivers/video/fbdev/atmel_lcdfb.c
index 016cae1425a4ea..19eb42b57d8742 100644
--- a/drivers/video/fbdev/atmel_lcdfb.c
+++ b/drivers/video/fbdev/atmel_lcdfb.c
@@ -19,7 +19,6 @@
 #include <linux/backlight.h>
 #include <linux/gfp.h>
 #include <linux/module.h>
-#include <linux/platform_data/atmel.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/of_gpio.h>

From fce388afd633cdf20844e17f47b0d97c7ee06e69 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Thu, 20 Aug 2015 23:40:46 +0200
Subject: [PATCH 467/734] i2c: mux: reg: simplify register size checking

Checking was done at three different locations, just do it once and
properly at probing time.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Tested-by: York Sun <yorksun@freescale.com>
---
 drivers/i2c/muxes/i2c-mux-reg.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/drivers/i2c/muxes/i2c-mux-reg.c b/drivers/i2c/muxes/i2c-mux-reg.c
index 57ec57e0da529f..fb5b55a0cf9d5a 100644
--- a/drivers/i2c/muxes/i2c-mux-reg.c
+++ b/drivers/i2c/muxes/i2c-mux-reg.c
@@ -59,9 +59,6 @@ static int i2c_mux_reg_set(const struct regmux *mux, unsigned int chan_id)
 		if (!mux->data.write_only)
 			ioread8(mux->data.reg);
 		break;
-	default:
-		pr_err("Invalid register size\n");
-		return -EINVAL;
 	}
 
 	return 0;
@@ -155,10 +152,6 @@ static int i2c_mux_reg_probe_dt(struct regmux *mux,
 	/* map address from "reg" if exists */
 	if (of_address_to_resource(np, 0, &res)) {
 		mux->data.reg_size = resource_size(&res);
-		if (mux->data.reg_size > 4) {
-			dev_err(&pdev->dev, "Invalid address size\n");
-			return -EINVAL;
-		}
 		mux->data.reg = devm_ioremap_resource(&pdev->dev, &res);
 		if (IS_ERR(mux->data.reg))
 			return PTR_ERR(mux->data.reg);
@@ -211,15 +204,17 @@ static int i2c_mux_reg_probe(struct platform_device *pdev)
 			"Register not set, using platform resource\n");
 		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 		mux->data.reg_size = resource_size(res);
-		if (mux->data.reg_size > 4) {
-			dev_err(&pdev->dev, "Invalid resource size\n");
-			return -EINVAL;
-		}
 		mux->data.reg = devm_ioremap_resource(&pdev->dev, res);
 		if (IS_ERR(mux->data.reg))
 			return PTR_ERR(mux->data.reg);
 	}
 
+	if (mux->data.reg_size != 4 && mux->data.reg_size != 2 &&
+	    mux->data.reg_size != 1) {
+		dev_err(&pdev->dev, "Invalid register size\n");
+		return -EINVAL;
+	}
+
 	mux->adap = devm_kzalloc(&pdev->dev,
 				 sizeof(*mux->adap) * mux->data.n_values,
 				 GFP_KERNEL);

From a05a34e7f6ff4a942186e99932885b10c6bc1d1a Mon Sep 17 00:00:00 2001
From: Mike Rapoport <mike.rapoport@gmail.com>
Date: Wed, 2 Sep 2015 11:03:41 +0300
Subject: [PATCH 468/734] i2c: mux: reg: fix compilation warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With CONFIG_OF=n, the following warnings occur when compiling
drivers/i2c/muxes/i2c-mux-reg.c:

  CC      drivers/i2c/muxes/i2c-mux-reg.o
drivers/i2c/muxes/i2c-mux-reg.c:170:13: warning: ‘struct gpiomux’ declared inside parameter list [enabled by default]
      struct platform_device *pdev)
             ^
drivers/i2c/muxes/i2c-mux-reg.c:170:13: warning: its scope
is only this definition or declaration, which is probably not what you want [enabled by default]
drivers/i2c/muxes/i2c-mux-reg.c: In function ‘i2c_mux_reg_probe’:
drivers/i2c/muxes/i2c-mux-reg.c:201:3: warning: passing argument 1 of ‘i2c_mux_reg_probe_dt’ from incompatible pointer type [enabled by default]
   ret = i2c_mux_reg_probe_dt(mux, pdev);
   ^

Signed-off-by: Mike Rapoport <mike.rapoport@gmail.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/muxes/i2c-mux-reg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/muxes/i2c-mux-reg.c b/drivers/i2c/muxes/i2c-mux-reg.c
index fb5b55a0cf9d5a..7913d8019bb377 100644
--- a/drivers/i2c/muxes/i2c-mux-reg.c
+++ b/drivers/i2c/muxes/i2c-mux-reg.c
@@ -160,7 +160,7 @@ static int i2c_mux_reg_probe_dt(struct regmux *mux,
 	return 0;
 }
 #else
-static int i2c_mux_reg_probe_dt(struct gpiomux *mux,
+static int i2c_mux_reg_probe_dt(struct regmux *mux,
 					struct platform_device *pdev)
 {
 	return 0;

From 5a73882fd2c3a86b502d54da532d373a1f2db15e Mon Sep 17 00:00:00 2001
From: York Sun <yorksun@freescale.com>
Date: Wed, 2 Sep 2015 11:40:46 -0500
Subject: [PATCH 469/734] i2c: mux: reg Change ioread endianness for readback

Reading the register (if allowed) after writing is to ensure writing
is completed on a posted bus. The endianness of reading doesn't matter.

Signed-off-by: York Sun <yorksun@freescale.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/muxes/i2c-mux-reg.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/i2c/muxes/i2c-mux-reg.c b/drivers/i2c/muxes/i2c-mux-reg.c
index 7913d8019bb377..5fbd5bd0878f1f 100644
--- a/drivers/i2c/muxes/i2c-mux-reg.c
+++ b/drivers/i2c/muxes/i2c-mux-reg.c
@@ -31,28 +31,28 @@ static int i2c_mux_reg_set(const struct regmux *mux, unsigned int chan_id)
 	if (!mux->data.reg)
 		return -EINVAL;
 
+	/*
+	 * Write to the register, followed by a read to ensure the write is
+	 * completed on a "posted" bus, for example PCI or write buffers.
+	 * The endianness of reading doesn't matter and the return data
+	 * is not used.
+	 */
 	switch (mux->data.reg_size) {
 	case 4:
-		if (mux->data.little_endian) {
+		if (mux->data.little_endian)
 			iowrite32(chan_id, mux->data.reg);
-			if (!mux->data.write_only)
-				ioread32(mux->data.reg);
-		} else {
+		else
 			iowrite32be(chan_id, mux->data.reg);
-			if (!mux->data.write_only)
-				ioread32(mux->data.reg);
-		}
+		if (!mux->data.write_only)
+			ioread32(mux->data.reg);
 		break;
 	case 2:
-		if (mux->data.little_endian) {
+		if (mux->data.little_endian)
 			iowrite16(chan_id, mux->data.reg);
-			if (!mux->data.write_only)
-				ioread16(mux->data.reg);
-		} else {
+		else
 			iowrite16be(chan_id, mux->data.reg);
-			if (!mux->data.write_only)
-				ioread16be(mux->data.reg);
-		}
+		if (!mux->data.write_only)
+			ioread16(mux->data.reg);
 		break;
 	case 1:
 		iowrite8(chan_id, mux->data.reg);

From b7dc42fd79390c074e2bff3b172b585d5c2d80c2 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Thu, 3 Sep 2015 08:57:12 -0400
Subject: [PATCH 470/734] ring-buffer: Revert "ring-buffer: Get timestamp after
 event is allocated"

The commit a4543a2fa9ef31 "ring-buffer: Get timestamp after event is
allocated" is needed for some future work. But after adding it, there is a
race somewhere that causes the saved timestamp to have a slight shift, and
get ahead of the actual timestamp and make it look like time goes backwards.

I'm still looking into why this happens, but in the mean time, this is
holding up other work to get in. I'm reverting the change for now (which
makes the problem go away), and will add it back after I know what is wrong
and fix it.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/ring_buffer.c | 156 ++++++++++++-------------------------
 1 file changed, 50 insertions(+), 106 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 1cce0fbf92cea0..fc347f8b1bca24 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2141,8 +2141,6 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
 	local_sub(length, &tail_page->write);
 }
 
-static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer);
-
 /*
  * This is the slow path, force gcc not to inline it.
  */
@@ -2157,16 +2155,6 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
 	int ret;
 	u64 ts;
 
-	/*
-	 * If the event had a timestamp attached to it, remove it.
-	 * The first event on a page (nested or not) always uses
-	 * the full timestamp of the new page.
-	 */
-	if (info->add_timestamp) {
-		info->add_timestamp = 0;
-		info->length -= RB_LEN_TIME_EXTEND;
-	}
-
 	next_page = tail_page;
 
 	rb_inc_page(cpu_buffer, &next_page);
@@ -2253,11 +2241,6 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
 
 	rb_reset_tail(cpu_buffer, tail, info);
 
-	/* Commit what we have for now to update timestamps */
-	rb_end_commit(cpu_buffer);
-	/* rb_end_commit() decs committing */
-	local_inc(&cpu_buffer->committing);
-
 	/* fail and let the caller try again */
 	return ERR_PTR(-EAGAIN);
 
@@ -2287,6 +2270,9 @@ rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
 	return skip_time_extend(event);
 }
 
+static inline int rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
+				     struct ring_buffer_event *event);
+
 /**
  * rb_update_event - update event type and data
  * @event: the event to update
@@ -2298,7 +2284,7 @@ rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
  * and with this, we can determine what to place into the
  * data field.
  */
-static void __always_inline
+static void
 rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
 		struct ring_buffer_event *event,
 		struct rb_event_info *info)
@@ -2306,6 +2292,10 @@ rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
 	unsigned length = info->length;
 	u64 delta = info->delta;
 
+	/* Only a commit updates the timestamp */
+	if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
+		delta = 0;
+
 	/*
 	 * If we need to add a timestamp, then we
 	 * add it to the start of the resevered space.
@@ -2685,13 +2675,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
 
 static noinline void
 rb_handle_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
-		    struct ring_buffer_event *event,
 		    struct rb_event_info *info)
 {
-	struct ring_buffer_event *padding;
-	int length;
-	int size;
-
 	WARN_ONCE(info->delta > (1ULL << 59),
 		  KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
 		  (unsigned long long)info->delta,
@@ -2701,61 +2686,7 @@ rb_handle_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
 		  "If you just came from a suspend/resume,\n"
 		  "please switch to the trace global clock:\n"
 		  "  echo global > /sys/kernel/debug/tracing/trace_clock\n");
-
-	/*
-	 * Discarding this event to add a timestamp in front, but
-	 * we still need to update the length of it to perform the discard.
-	 */
-	rb_update_event(cpu_buffer, event, info);
-
-	if (rb_try_to_discard(cpu_buffer, event)) {
-		info->add_timestamp = 1;
-		/*
-		 * The time delta since the last event is too big to
-		 * hold in the time field of the event, then we append a
-		 * TIME EXTEND event ahead of the data event.
-		 */
-		info->length += RB_LEN_TIME_EXTEND;
-		return;
-	}
-
-	/*
-	 * Humpf! An event came in after this one, and because it is not a
-	 * commit, it will have a delta of zero, thus, it will take on
-	 * the timestamp of the previous commit, which happened a long time
-	 * ago (we need to add a timestamp, remember?).
-	 * We need to add the timestamp here. A timestamp is a fixed size
-	 * of 8 bytes. That means the rest of the event needs to be
-	 * padding.
-	 */
-	size = info->length - RB_LEN_TIME_EXTEND;
-
-	/* The padding will have a delta of 1 */
-	if (size)
-		info->delta--;
-
-	padding = rb_add_time_stamp(event, info->delta);
-
-	if (size) {
-		length = info->length;
-		info->delta = 0;
-		info->length = size;
-		rb_update_event(cpu_buffer, padding, info);
-
-		rb_event_discard(padding);
-
-		/* Still visible, need to update write_stamp */
-		rb_update_write_stamp(cpu_buffer, event);
-
-		/* Still need to commit the padding. */
-		rb_end_commit(cpu_buffer);
-
-		/* rb_end_commit() decs committing */
-		local_inc(&cpu_buffer->committing);
-
-		/* The next iteration still uses the original length */
-		info->length = length;
-	}
+	info->add_timestamp = 1;
 }
 
 static struct ring_buffer_event *
@@ -2765,7 +2696,14 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 	struct ring_buffer_event *event;
 	struct buffer_page *tail_page;
 	unsigned long tail, write;
-	bool is_commit;
+
+	/*
+	 * If the time delta since the last event is too big to
+	 * hold in the time field of the event, then we append a
+	 * TIME EXTEND event ahead of the data event.
+	 */
+	if (unlikely(info->add_timestamp))
+		info->length += RB_LEN_TIME_EXTEND;
 
 	tail_page = info->tail_page = cpu_buffer->tail_page;
 	write = local_add_return(info->length, &tail_page->write);
@@ -2774,43 +2712,32 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 	write &= RB_WRITE_MASK;
 	tail = write - info->length;
 
-	/* See if we shot pass the end of this buffer page */
-	if (unlikely(write > BUF_PAGE_SIZE))
-		return rb_move_tail(cpu_buffer, tail, info);
-
-	/* We reserved something on the buffer */
-	event = __rb_page_index(tail_page, tail);
-
 	/*
 	 * If this is the first commit on the page, then it has the same
-	 * timestamp as the page itself, otherwise we need to figure out
-	 * the delta.
+	 * timestamp as the page itself.
 	 */
-	info->ts = rb_time_stamp(cpu_buffer->buffer);
-	is_commit = rb_event_is_commit(cpu_buffer, event);
-
-	/* Commits are special (non nested events) */
-	info->delta = is_commit ? info->ts - cpu_buffer->write_stamp : 0;
-
-	if (!tail) {
-		/*
-		 * If this is the first commit on the page, set the
-		 * page to its timestamp.
-		 */
-		tail_page->page->time_stamp = info->ts;
+	if (!tail)
 		info->delta = 0;
 
-	} else if (unlikely(test_time_stamp(info->delta)) &&
-		   !info->add_timestamp) {
-		rb_handle_timestamp(cpu_buffer, event, info);
-		return ERR_PTR(-EAGAIN);
-	}
+	/* See if we shot pass the end of this buffer page */
+	if (unlikely(write > BUF_PAGE_SIZE))
+		return rb_move_tail(cpu_buffer, tail, info);
 
+	/* We reserved something on the buffer */
+
+	event = __rb_page_index(tail_page, tail);
 	kmemcheck_annotate_bitfield(event, bitfield);
 	rb_update_event(cpu_buffer, event, info);
 
 	local_inc(&tail_page->entries);
 
+	/*
+	 * If this is the first commit on the page, then update
+	 * its timestamp.
+	 */
+	if (!tail)
+		tail_page->page->time_stamp = info->ts;
+
 	/* account for these added bytes */
 	local_add(info->length, &cpu_buffer->entries_bytes);
 
@@ -2825,6 +2752,7 @@ rb_reserve_next_event(struct ring_buffer *buffer,
 	struct ring_buffer_event *event;
 	struct rb_event_info info;
 	int nr_loops = 0;
+	u64 diff;
 
 	rb_start_commit(cpu_buffer);
 
@@ -2842,9 +2770,12 @@ rb_reserve_next_event(struct ring_buffer *buffer,
 		return NULL;
 	}
 #endif
+
 	info.length = rb_calculate_event_length(length);
-	info.add_timestamp = 0;
  again:
+	info.add_timestamp = 0;
+	info.delta = 0;
+
 	/*
 	 * We allow for interrupts to reenter here and do a trace.
 	 * If one does, it will cause this original code to loop
@@ -2857,6 +2788,19 @@ rb_reserve_next_event(struct ring_buffer *buffer,
 	if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
 		goto out_fail;
 
+	info.ts = rb_time_stamp(cpu_buffer->buffer);
+	diff = info.ts - cpu_buffer->write_stamp;
+
+	/* make sure this diff is calculated here */
+	barrier();
+
+	/* Did the write stamp get updated already? */
+	if (likely(info.ts >= cpu_buffer->write_stamp)) {
+		info.delta = diff;
+		if (unlikely(test_time_stamp(info.delta)))
+			rb_handle_timestamp(cpu_buffer, &info);
+	}
+
 	event = __rb_reserve_next(cpu_buffer, &info);
 
 	if (unlikely(PTR_ERR(event) == -EAGAIN))

From 92e847212676bb3c5f9f7e317907367dbb8c504b Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Thu, 3 Sep 2015 14:58:55 -0500
Subject: [PATCH 471/734] ipmi: Add device tree bindings information

Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 Documentation/devicetree/bindings/ipmi.txt | 25 ++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/ipmi.txt

diff --git a/Documentation/devicetree/bindings/ipmi.txt b/Documentation/devicetree/bindings/ipmi.txt
new file mode 100644
index 00000000000000..d5f1a877ed3ec6
--- /dev/null
+++ b/Documentation/devicetree/bindings/ipmi.txt
@@ -0,0 +1,25 @@
+IPMI device
+
+Required properties:
+- compatible: should be one of ipmi-kcs, ipmi-smic, or ipmi-bt
+- device_type: should be ipmi
+- reg: Address and length of the register set for the device
+
+Optional properties:
+- interrupts: The interrupt for the device.  Without this the interface
+	is polled.
+- reg-size - The size of the register.  Defaults to 1
+- reg-spacing - The number of bytes between register starts.  Defaults to 1
+- reg-shift - The amount to shift the registers to the right to get the data
+	into bit zero.
+
+Example:
+
+smic@fff3a000 {
+	compatible = "ipmi-smic";
+	device_type = "ipmi";
+	reg = <0xfff3a000 0x1000>;
+	interrupts = <0 24 4>;
+	reg-size = <4>;
+	reg-spacing = <4>;
+};

From 0fbcf4af7c8362d4691f9388efa57d0b14b34225 Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Tue, 9 Jun 2015 16:51:46 -0500
Subject: [PATCH 472/734] ipmi: Convert the IPMI SI ACPI handling to a platform
 device

The IPMI SI driver was using direct PNP, but that was not really
ideal because the IPMI device is a platform device.  There was
some special handling in the acpi_pnp.c code for making this work,
but that was breaking ACPI handling for the IPMI SSIF driver.

So without this patch there were significant issues getting the
SSIF driver to work with ACPI.

So use a platform device for ACPI detection and remove the
entry from acpi_pnp.c.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/acpi/acpi_pnp.c          |   2 -
 drivers/char/ipmi/ipmi_si_intf.c | 320 +++++++++++++++----------------
 2 files changed, 157 insertions(+), 165 deletions(-)

diff --git a/drivers/acpi/acpi_pnp.c b/drivers/acpi/acpi_pnp.c
index fb765524cc3d5a..c58940b231d69d 100644
--- a/drivers/acpi/acpi_pnp.c
+++ b/drivers/acpi/acpi_pnp.c
@@ -19,8 +19,6 @@ static const struct acpi_device_id acpi_pnp_device_ids[] = {
 	{"PNP0600"},		/* Generic ESDI/IDE/ATA compatible hard disk controller */
 	/* floppy */
 	{"PNP0700"},
-	/* ipmi_si */
-	{"IPI0001"},
 	/* tpm_inf_pnp */
 	{"IFX0101"},		/* Infineon TPMs */
 	{"IFX0102"},		/* Infineon TPMs */
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 8a45e92ff60c74..6443e762b42655 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -64,7 +64,6 @@
 #include <linux/dmi.h>
 #include <linux/string.h>
 #include <linux/ctype.h>
-#include <linux/pnp.h>
 #include <linux/of_device.h>
 #include <linux/of_platform.h>
 #include <linux/of_address.h>
@@ -309,9 +308,6 @@ static int num_force_kipmid;
 #ifdef CONFIG_PCI
 static bool pci_registered;
 #endif
-#ifdef CONFIG_ACPI
-static bool pnp_registered;
-#endif
 #ifdef CONFIG_PARISC
 static bool parisc_registered;
 #endif
@@ -2233,134 +2229,6 @@ static void spmi_find_bmc(void)
 		try_init_spmi(spmi);
 	}
 }
-
-static int ipmi_pnp_probe(struct pnp_dev *dev,
-				    const struct pnp_device_id *dev_id)
-{
-	struct acpi_device *acpi_dev;
-	struct smi_info *info;
-	struct resource *res, *res_second;
-	acpi_handle handle;
-	acpi_status status;
-	unsigned long long tmp;
-	int rv = -EINVAL;
-
-	acpi_dev = pnp_acpi_device(dev);
-	if (!acpi_dev)
-		return -ENODEV;
-
-	info = smi_info_alloc();
-	if (!info)
-		return -ENOMEM;
-
-	info->addr_source = SI_ACPI;
-	printk(KERN_INFO PFX "probing via ACPI\n");
-
-	handle = acpi_dev->handle;
-	info->addr_info.acpi_info.acpi_handle = handle;
-
-	/* _IFT tells us the interface type: KCS, BT, etc */
-	status = acpi_evaluate_integer(handle, "_IFT", NULL, &tmp);
-	if (ACPI_FAILURE(status)) {
-		dev_err(&dev->dev, "Could not find ACPI IPMI interface type\n");
-		goto err_free;
-	}
-
-	switch (tmp) {
-	case 1:
-		info->si_type = SI_KCS;
-		break;
-	case 2:
-		info->si_type = SI_SMIC;
-		break;
-	case 3:
-		info->si_type = SI_BT;
-		break;
-	case 4: /* SSIF, just ignore */
-		rv = -ENODEV;
-		goto err_free;
-	default:
-		dev_info(&dev->dev, "unknown IPMI type %lld\n", tmp);
-		goto err_free;
-	}
-
-	res = pnp_get_resource(dev, IORESOURCE_IO, 0);
-	if (res) {
-		info->io_setup = port_setup;
-		info->io.addr_type = IPMI_IO_ADDR_SPACE;
-	} else {
-		res = pnp_get_resource(dev, IORESOURCE_MEM, 0);
-		if (res) {
-			info->io_setup = mem_setup;
-			info->io.addr_type = IPMI_MEM_ADDR_SPACE;
-		}
-	}
-	if (!res) {
-		dev_err(&dev->dev, "no I/O or memory address\n");
-		goto err_free;
-	}
-	info->io.addr_data = res->start;
-
-	info->io.regspacing = DEFAULT_REGSPACING;
-	res_second = pnp_get_resource(dev,
-			       (info->io.addr_type == IPMI_IO_ADDR_SPACE) ?
-					IORESOURCE_IO : IORESOURCE_MEM,
-			       1);
-	if (res_second) {
-		if (res_second->start > info->io.addr_data)
-			info->io.regspacing = res_second->start - info->io.addr_data;
-	}
-	info->io.regsize = DEFAULT_REGSPACING;
-	info->io.regshift = 0;
-
-	/* If _GPE exists, use it; otherwise use standard interrupts */
-	status = acpi_evaluate_integer(handle, "_GPE", NULL, &tmp);
-	if (ACPI_SUCCESS(status)) {
-		info->irq = tmp;
-		info->irq_setup = acpi_gpe_irq_setup;
-	} else if (pnp_irq_valid(dev, 0)) {
-		info->irq = pnp_irq(dev, 0);
-		info->irq_setup = std_irq_setup;
-	}
-
-	info->dev = &dev->dev;
-	pnp_set_drvdata(dev, info);
-
-	dev_info(info->dev, "%pR regsize %d spacing %d irq %d\n",
-		 res, info->io.regsize, info->io.regspacing,
-		 info->irq);
-
-	rv = add_smi(info);
-	if (rv)
-		kfree(info);
-
-	return rv;
-
-err_free:
-	kfree(info);
-	return rv;
-}
-
-static void ipmi_pnp_remove(struct pnp_dev *dev)
-{
-	struct smi_info *info = pnp_get_drvdata(dev);
-
-	cleanup_one_si(info);
-}
-
-static const struct pnp_device_id pnp_dev_table[] = {
-	{"IPI0001", 0},
-	{"", 0},
-};
-
-static struct pnp_driver ipmi_pnp_driver = {
-	.name		= DEVICE_NAME,
-	.probe		= ipmi_pnp_probe,
-	.remove		= ipmi_pnp_remove,
-	.id_table	= pnp_dev_table,
-};
-
-MODULE_DEVICE_TABLE(pnp, pnp_dev_table);
 #endif
 
 #ifdef CONFIG_DMI
@@ -2669,10 +2537,19 @@ static struct pci_driver ipmi_pci_driver = {
 };
 #endif /* CONFIG_PCI */
 
-static const struct of_device_id ipmi_match[];
-static int ipmi_probe(struct platform_device *dev)
-{
 #ifdef CONFIG_OF
+static const struct of_device_id of_ipmi_match[] = {
+	{ .type = "ipmi", .compatible = "ipmi-kcs",
+	  .data = (void *)(unsigned long) SI_KCS },
+	{ .type = "ipmi", .compatible = "ipmi-smic",
+	  .data = (void *)(unsigned long) SI_SMIC },
+	{ .type = "ipmi", .compatible = "ipmi-bt",
+	  .data = (void *)(unsigned long) SI_BT },
+	{},
+};
+
+static int of_ipmi_probe(struct platform_device *dev)
+{
 	const struct of_device_id *match;
 	struct smi_info *info;
 	struct resource resource;
@@ -2683,9 +2560,9 @@ static int ipmi_probe(struct platform_device *dev)
 
 	dev_info(&dev->dev, "probing via device tree\n");
 
-	match = of_match_device(ipmi_match, &dev->dev);
+	match = of_match_device(of_ipmi_match, &dev->dev);
 	if (!match)
-		return -EINVAL;
+		return -ENODEV;
 
 	if (!of_device_is_available(np))
 		return -EINVAL;
@@ -2754,33 +2631,161 @@ static int ipmi_probe(struct platform_device *dev)
 		kfree(info);
 		return ret;
 	}
-#endif
 	return 0;
 }
+#else
+#define of_ipmi_match NULL
+static int of_ipmi_probe(struct platform_device *dev)
+{
+	return -ENODEV;
+}
+#endif
 
-static int ipmi_remove(struct platform_device *dev)
+#ifdef CONFIG_ACPI
+static int acpi_ipmi_probe(struct platform_device *dev)
 {
-#ifdef CONFIG_OF
-	cleanup_one_si(dev_get_drvdata(&dev->dev));
+	struct smi_info *info;
+	struct resource *res, *res_second;
+	acpi_handle handle;
+	acpi_status status;
+	unsigned long long tmp;
+	int rv = -EINVAL;
+
+	handle = ACPI_HANDLE(&dev->dev);
+	if (!handle)
+		return -ENODEV;
+
+	info = smi_info_alloc();
+	if (!info)
+		return -ENOMEM;
+
+	info->addr_source = SI_ACPI;
+	dev_info(&dev->dev, PFX "probing via ACPI\n");
+
+	info->addr_info.acpi_info.acpi_handle = handle;
+
+	/* _IFT tells us the interface type: KCS, BT, etc */
+	status = acpi_evaluate_integer(handle, "_IFT", NULL, &tmp);
+	if (ACPI_FAILURE(status)) {
+		dev_err(&dev->dev, "Could not find ACPI IPMI interface type\n");
+		goto err_free;
+	}
+
+	switch (tmp) {
+	case 1:
+		info->si_type = SI_KCS;
+		break;
+	case 2:
+		info->si_type = SI_SMIC;
+		break;
+	case 3:
+		info->si_type = SI_BT;
+		break;
+	case 4: /* SSIF, just ignore */
+		rv = -ENODEV;
+		goto err_free;
+	default:
+		dev_info(&dev->dev, "unknown IPMI type %lld\n", tmp);
+		goto err_free;
+	}
+
+	res = platform_get_resource(dev, IORESOURCE_IO, 0);
+	if (res) {
+		info->io_setup = port_setup;
+		info->io.addr_type = IPMI_IO_ADDR_SPACE;
+	} else {
+		res = platform_get_resource(dev, IORESOURCE_MEM, 0);
+		if (res) {
+			info->io_setup = mem_setup;
+			info->io.addr_type = IPMI_MEM_ADDR_SPACE;
+		}
+	}
+	if (!res) {
+		dev_err(&dev->dev, "no I/O or memory address\n");
+		goto err_free;
+	}
+	info->io.addr_data = res->start;
+
+	info->io.regspacing = DEFAULT_REGSPACING;
+	res_second = platform_get_resource(dev,
+			       (info->io.addr_type == IPMI_IO_ADDR_SPACE) ?
+					IORESOURCE_IO : IORESOURCE_MEM,
+			       1);
+	if (res_second) {
+		if (res_second->start > info->io.addr_data)
+			info->io.regspacing =
+				res_second->start - info->io.addr_data;
+	}
+	info->io.regsize = DEFAULT_REGSPACING;
+	info->io.regshift = 0;
+
+	/* If _GPE exists, use it; otherwise use standard interrupts */
+	status = acpi_evaluate_integer(handle, "_GPE", NULL, &tmp);
+	if (ACPI_SUCCESS(status)) {
+		info->irq = tmp;
+		info->irq_setup = acpi_gpe_irq_setup;
+	} else {
+		int irq = platform_get_irq(dev, 0);
+
+		if (irq > 0) {
+			info->irq = irq;
+			info->irq_setup = std_irq_setup;
+		}
+	}
+
+	info->dev = &dev->dev;
+	platform_set_drvdata(dev, info);
+
+	dev_info(info->dev, "%pR regsize %d spacing %d irq %d\n",
+		 res, info->io.regsize, info->io.regspacing,
+		 info->irq);
+
+	rv = add_smi(info);
+	if (rv)
+		kfree(info);
+
+	return rv;
+
+err_free:
+	kfree(info);
+	return rv;
+}
+
+static struct acpi_device_id acpi_ipmi_match[] = {
+	{ "IPI0001", 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(acpi, acpi_ipmi_match);
+#else
+static int acpi_ipmi_probe(struct platform_device *dev)
+{
+	return -ENODEV;
+}
 #endif
-	return 0;
+
+static int ipmi_probe(struct platform_device *dev)
+{
+	if (of_ipmi_probe(dev) == 0)
+		return 0;
+
+	return acpi_ipmi_probe(dev);
 }
 
-static const struct of_device_id ipmi_match[] =
+static int ipmi_remove(struct platform_device *dev)
 {
-	{ .type = "ipmi", .compatible = "ipmi-kcs",
-	  .data = (void *)(unsigned long) SI_KCS },
-	{ .type = "ipmi", .compatible = "ipmi-smic",
-	  .data = (void *)(unsigned long) SI_SMIC },
-	{ .type = "ipmi", .compatible = "ipmi-bt",
-	  .data = (void *)(unsigned long) SI_BT },
-	{},
-};
+	struct smi_info *info = dev_get_drvdata(&dev->dev);
+
+	if (info)
+		cleanup_one_si(info);
+
+	return 0;
+}
 
 static struct platform_driver ipmi_driver = {
 	.driver = {
 		.name = DEVICE_NAME,
-		.of_match_table = ipmi_match,
+		.of_match_table = of_ipmi_match,
+		.acpi_match_table = ACPI_PTR(acpi_ipmi_match),
 	},
 	.probe		= ipmi_probe,
 	.remove		= ipmi_remove,
@@ -3692,13 +3697,6 @@ static int init_ipmi_si(void)
 	}
 #endif
 
-#ifdef CONFIG_ACPI
-	if (si_tryacpi) {
-		pnp_register_driver(&ipmi_pnp_driver);
-		pnp_registered = true;
-	}
-#endif
-
 #ifdef CONFIG_DMI
 	if (si_trydmi)
 		dmi_find_bmc();
@@ -3850,10 +3848,6 @@ static void cleanup_ipmi_si(void)
 	if (pci_registered)
 		pci_unregister_driver(&ipmi_pci_driver);
 #endif
-#ifdef CONFIG_ACPI
-	if (pnp_registered)
-		pnp_unregister_driver(&ipmi_pnp_driver);
-#endif
 #ifdef CONFIG_PARISC
 	if (parisc_registered)
 		unregister_parisc_driver(&ipmi_parisc_driver);

From cca85f19c260df495a487495479c67803b25fa8a Mon Sep 17 00:00:00 2001
From: Neelesh Gupta <neelegup@linux.vnet.ibm.com>
Date: Thu, 16 Jul 2015 16:46:54 +0530
Subject: [PATCH 473/734] ipmi/powernv: Fix potential invalid pointer
 dereference

If the OPAL call to receive the ipmi message fails, then we free up the
smi message and return. But, the driver still holds the reference to
old smi message in the 'cur_msg' which can potentially be accessed later
and freed again leading to kernel oops. To fix it up,

The kernel driver should reset the 'cur_msg' and send reply to the user
in addition to freeing the message.

Signed-off-by: Neelesh Gupta <neelegup@linux.vnet.ibm.com>

Fixed a checkpatch warning dealing with an else after a return.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_powernv.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/char/ipmi/ipmi_powernv.c b/drivers/char/ipmi/ipmi_powernv.c
index 9b409c0f14f7cc..62c0c634280f2d 100644
--- a/drivers/char/ipmi/ipmi_powernv.c
+++ b/drivers/char/ipmi/ipmi_powernv.c
@@ -143,8 +143,15 @@ static int ipmi_powernv_recv(struct ipmi_smi_powernv *smi)
 	pr_devel("%s:   -> %d (size %lld)\n", __func__,
 			rc, rc == 0 ? size : 0);
 	if (rc) {
+		/* If came via the poll, and response was not yet ready */
+		if (rc == OPAL_EMPTY) {
+			spin_unlock_irqrestore(&smi->msg_lock, flags);
+			return 0;
+		}
+
+		smi->cur_msg = NULL;
 		spin_unlock_irqrestore(&smi->msg_lock, flags);
-		ipmi_free_smi_msg(msg);
+		send_error_reply(smi, msg, IPMI_ERR_UNSPECIFIED);
 		return 0;
 	}
 

From b2234ee9fc059c17e811a365383e3412a2f50bed Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Thu, 19 Feb 2015 11:29:24 -0600
Subject: [PATCH 474/734] ipmi: Add a comment in how messages are delivered
 from the lower layer

To avoid confusion in the future.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_msghandler.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index bf75f636177315..ef4a418f630aef 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -3959,6 +3959,10 @@ void ipmi_smi_msg_received(ipmi_smi_t          intf,
 
 	if (!run_to_completion)
 		spin_lock_irqsave(&intf->xmit_msgs_lock, flags);
+	/*
+	 * We can get an asynchronous event or receive message in addition
+	 * to commands we send.
+	 */
 	if (msg == intf->curr_msg)
 		intf->curr_msg = NULL;
 	if (!run_to_completion)

From 881c585ef79addb2440a7f8d59bda5640d0ff623 Mon Sep 17 00:00:00 2001
From: Nicholas Krause <xerofoify@gmail.com>
Date: Sat, 9 May 2015 15:32:28 -0400
Subject: [PATCH 475/734] impi:Remove unneeded setting of module owner to
 THIS_MODULE in the platform structure, powernv_ipmi_driver

This removes the no longer required setting of the module owner
for the plaform structure,powernv_ipmi_driver to THIS_MODULE as
the driver core for ipmi drivers will directly find and
set the module owner for this driver.

Signed-off-by: Nicholas Krause <xerofoify@gmail.com>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_powernv.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/char/ipmi/ipmi_powernv.c b/drivers/char/ipmi/ipmi_powernv.c
index 62c0c634280f2d..6e658aa114f19d 100644
--- a/drivers/char/ipmi/ipmi_powernv.c
+++ b/drivers/char/ipmi/ipmi_powernv.c
@@ -307,7 +307,6 @@ static const struct of_device_id ipmi_powernv_match[] = {
 static struct platform_driver powernv_ipmi_driver = {
 	.driver = {
 		.name		= "ipmi-powernv",
-		.owner		= THIS_MODULE,
 		.of_match_table	= ipmi_powernv_match,
 	},
 	.probe	= ipmi_powernv_probe,

From fedb25ea903d3520b6cff00c41740a892cf6bc0e Mon Sep 17 00:00:00 2001
From: Shailendra Verma <shailendra.capricorn@gmail.com>
Date: Tue, 26 May 2015 00:54:57 +0530
Subject: [PATCH 476/734] char:ipmi - Change 1 to true for bool type variables
 during initialization.

Signed-off-by: Shailendra Verma <shailendra.capricorn@gmail.com>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_si_intf.c | 8 ++++----
 drivers/char/ipmi/ipmi_ssif.c    | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 6443e762b42655..b1fdbf78da9bca 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -1279,14 +1279,14 @@ static int smi_num; /* Used to sequence the SMIs */
 #define DEFAULT_REGSIZE		1
 
 #ifdef CONFIG_ACPI
-static bool          si_tryacpi = 1;
+static bool          si_tryacpi = true;
 #endif
 #ifdef CONFIG_DMI
-static bool          si_trydmi = 1;
+static bool          si_trydmi = true;
 #endif
-static bool          si_tryplatform = 1;
+static bool          si_tryplatform = true;
 #ifdef CONFIG_PCI
-static bool          si_trypci = 1;
+static bool          si_trypci = true;
 #endif
 static bool          si_trydefaults = IS_ENABLED(CONFIG_IPMI_SI_PROBE_DEFAULTS);
 static char          *si_type[SI_MAX_PARMS];
diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index 207689c444a815..428a5faef74d37 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -1154,11 +1154,11 @@ static int use_thread;
 module_param(use_thread, int, 0);
 MODULE_PARM_DESC(use_thread, "Use the thread interface.");
 
-static bool ssif_tryacpi = 1;
+static bool ssif_tryacpi = true;
 module_param_named(tryacpi, ssif_tryacpi, bool, 0);
 MODULE_PARM_DESC(tryacpi, "Setting this to zero will disable the default scan of the interfaces identified via ACPI");
 
-static bool ssif_trydmi = 1;
+static bool ssif_trydmi = true;
 module_param_named(trydmi, ssif_trydmi, bool, 0);
 MODULE_PARM_DESC(trydmi, "Setting this to zero will disable the default scan of the interfaces identified via DMI (SMBIOS)");
 

From a7930899ca0082a33350b253c6ed34f67255f98e Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Sat, 27 Jun 2015 18:12:14 +0200
Subject: [PATCH 477/734] ipmi: Delete an unnecessary check before the function
 call "cleanup_one_si"

The cleanup_one_si() function tests whether its argument is NULL and then
returns immediately. Thus the test around the call is not needed.

This issue was detected by using the Coccinelle software.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_si_intf.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index b1fdbf78da9bca..4387bd6de2cae4 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -2775,9 +2775,7 @@ static int ipmi_remove(struct platform_device *dev)
 {
 	struct smi_info *info = dev_get_drvdata(&dev->dev);
 
-	if (info)
-		cleanup_one_si(info);
-
+	cleanup_one_si(info);
 	return 0;
 }
 

From 5186cf9c74034a4a7856de9c8048493be34c457d Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Sat, 13 Jun 2015 14:19:33 +0200
Subject: [PATCH 478/734] ipmi: constify SSIF ACPI device ids

Constify the ACPI device ID array, it doesn't need to be writable at
runtime.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_ssif.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index 428a5faef74d37..b043d8d4582350 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -1787,7 +1787,7 @@ static unsigned short *ssif_address_list(void)
 }
 
 #ifdef CONFIG_ACPI
-static struct acpi_device_id ssif_acpi_match[] = {
+static const struct acpi_device_id ssif_acpi_match[] = {
 	{ "IPI0001", 0 },
 	{ },
 };

From 81d02b7f8c507f06299476a0e5b2aa677c5eaecb Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Sat, 13 Jun 2015 10:34:25 -0500
Subject: [PATCH 479/734] ipmi: Make some data const that was only read

Several data structures were only used for reading, so make them
const.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_bt_sm.c      |  2 +-
 drivers/char/ipmi/ipmi_kcs_sm.c     |  2 +-
 drivers/char/ipmi/ipmi_msghandler.c | 12 ++++----
 drivers/char/ipmi/ipmi_si_intf.c    | 47 +++++++++++++++--------------
 drivers/char/ipmi/ipmi_si_sm.h      | 10 +++---
 drivers/char/ipmi/ipmi_smic_sm.c    |  2 +-
 include/linux/ipmi_smi.h            |  2 +-
 7 files changed, 40 insertions(+), 37 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_bt_sm.c b/drivers/char/ipmi/ipmi_bt_sm.c
index 61e71616689b7c..feafdab734ae20 100644
--- a/drivers/char/ipmi/ipmi_bt_sm.c
+++ b/drivers/char/ipmi/ipmi_bt_sm.c
@@ -694,7 +694,7 @@ static int bt_size(void)
 	return sizeof(struct si_sm_data);
 }
 
-struct si_sm_handlers bt_smi_handlers = {
+const struct si_sm_handlers bt_smi_handlers = {
 	.init_data		= bt_init_data,
 	.start_transaction	= bt_start_transaction,
 	.get_result		= bt_get_result,
diff --git a/drivers/char/ipmi/ipmi_kcs_sm.c b/drivers/char/ipmi/ipmi_kcs_sm.c
index 8c25f596808a9a..1da61af7f57678 100644
--- a/drivers/char/ipmi/ipmi_kcs_sm.c
+++ b/drivers/char/ipmi/ipmi_kcs_sm.c
@@ -540,7 +540,7 @@ static void kcs_cleanup(struct si_sm_data *kcs)
 {
 }
 
-struct si_sm_handlers kcs_smi_handlers = {
+const struct si_sm_handlers kcs_smi_handlers = {
 	.init_data         = init_kcs_data,
 	.start_transaction = start_kcs_transaction,
 	.get_result        = get_kcs_result,
diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index ef4a418f630aef..e9ea29c4ec604f 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -342,7 +342,7 @@ struct ipmi_smi {
 	 * an umpreemptible region to use this.  You must fetch the
 	 * value into a local variable and make sure it is not NULL.
 	 */
-	struct ipmi_smi_handlers *handlers;
+	const struct ipmi_smi_handlers *handlers;
 	void                     *send_info;
 
 #ifdef CONFIG_PROC_FS
@@ -1015,7 +1015,7 @@ int ipmi_get_smi_info(int if_num, struct ipmi_smi_info *data)
 {
 	int           rv = 0;
 	ipmi_smi_t    intf;
-	struct ipmi_smi_handlers *handlers;
+	const struct ipmi_smi_handlers *handlers;
 
 	mutex_lock(&ipmi_interfaces_mutex);
 	list_for_each_entry_rcu(intf, &ipmi_interfaces, link) {
@@ -1501,7 +1501,7 @@ static struct ipmi_smi_msg *smi_add_send_msg(ipmi_smi_t intf,
 }
 
 
-static void smi_send(ipmi_smi_t intf, struct ipmi_smi_handlers *handlers,
+static void smi_send(ipmi_smi_t intf, const struct ipmi_smi_handlers *handlers,
 		     struct ipmi_smi_msg *smi_msg, int priority)
 {
 	int run_to_completion = intf->run_to_completion;
@@ -2747,7 +2747,7 @@ void ipmi_poll_interface(ipmi_user_t user)
 }
 EXPORT_SYMBOL(ipmi_poll_interface);
 
-int ipmi_register_smi(struct ipmi_smi_handlers *handlers,
+int ipmi_register_smi(const struct ipmi_smi_handlers *handlers,
 		      void		       *send_info,
 		      struct ipmi_device_id    *device_id,
 		      struct device            *si_dev,
@@ -4019,7 +4019,7 @@ static void check_msg_timeout(ipmi_smi_t intf, struct seq_table *ent,
 			      unsigned int *waiting_msgs)
 {
 	struct ipmi_recv_msg     *msg;
-	struct ipmi_smi_handlers *handlers;
+	const struct ipmi_smi_handlers *handlers;
 
 	if (intf->in_shutdown)
 		return;
@@ -4086,7 +4086,7 @@ static void check_msg_timeout(ipmi_smi_t intf, struct seq_table *ent,
 				ipmi_inc_stat(intf,
 					      retransmitted_ipmb_commands);
 
-			smi_send(intf, intf->handlers, smi_msg, 0);
+			smi_send(intf, handlers, smi_msg, 0);
 		} else
 			ipmi_free_smi_msg(smi_msg);
 
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 4387bd6de2cae4..4a4a13dc98b3ae 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -163,7 +163,7 @@ struct smi_info {
 	int                    intf_num;
 	ipmi_smi_t             intf;
 	struct si_sm_data      *si_sm;
-	struct si_sm_handlers  *handlers;
+	const struct si_sm_handlers *handlers;
 	enum si_type           si_type;
 	spinlock_t             si_lock;
 	struct ipmi_smi_msg    *waiting_msg;
@@ -1254,7 +1254,7 @@ static void set_maintenance_mode(void *send_info, bool enable)
 		atomic_set(&smi_info->req_events, 0);
 }
 
-static struct ipmi_smi_handlers handlers = {
+static const struct ipmi_smi_handlers handlers = {
 	.owner                  = THIS_MODULE,
 	.start_processing       = smi_start_processing,
 	.get_smi_info		= get_smi_info,
@@ -1442,14 +1442,14 @@ static int std_irq_setup(struct smi_info *info)
 	return rv;
 }
 
-static unsigned char port_inb(struct si_sm_io *io, unsigned int offset)
+static unsigned char port_inb(const struct si_sm_io *io, unsigned int offset)
 {
 	unsigned int addr = io->addr_data;
 
 	return inb(addr + (offset * io->regspacing));
 }
 
-static void port_outb(struct si_sm_io *io, unsigned int offset,
+static void port_outb(const struct si_sm_io *io, unsigned int offset,
 		      unsigned char b)
 {
 	unsigned int addr = io->addr_data;
@@ -1457,14 +1457,14 @@ static void port_outb(struct si_sm_io *io, unsigned int offset,
 	outb(b, addr + (offset * io->regspacing));
 }
 
-static unsigned char port_inw(struct si_sm_io *io, unsigned int offset)
+static unsigned char port_inw(const struct si_sm_io *io, unsigned int offset)
 {
 	unsigned int addr = io->addr_data;
 
 	return (inw(addr + (offset * io->regspacing)) >> io->regshift) & 0xff;
 }
 
-static void port_outw(struct si_sm_io *io, unsigned int offset,
+static void port_outw(const struct si_sm_io *io, unsigned int offset,
 		      unsigned char b)
 {
 	unsigned int addr = io->addr_data;
@@ -1472,14 +1472,14 @@ static void port_outw(struct si_sm_io *io, unsigned int offset,
 	outw(b << io->regshift, addr + (offset * io->regspacing));
 }
 
-static unsigned char port_inl(struct si_sm_io *io, unsigned int offset)
+static unsigned char port_inl(const struct si_sm_io *io, unsigned int offset)
 {
 	unsigned int addr = io->addr_data;
 
 	return (inl(addr + (offset * io->regspacing)) >> io->regshift) & 0xff;
 }
 
-static void port_outl(struct si_sm_io *io, unsigned int offset,
+static void port_outl(const struct si_sm_io *io, unsigned int offset,
 		      unsigned char b)
 {
 	unsigned int addr = io->addr_data;
@@ -1552,49 +1552,52 @@ static int port_setup(struct smi_info *info)
 	return 0;
 }
 
-static unsigned char intf_mem_inb(struct si_sm_io *io, unsigned int offset)
+static unsigned char intf_mem_inb(const struct si_sm_io *io,
+				  unsigned int offset)
 {
 	return readb((io->addr)+(offset * io->regspacing));
 }
 
-static void intf_mem_outb(struct si_sm_io *io, unsigned int offset,
-		     unsigned char b)
+static void intf_mem_outb(const struct si_sm_io *io, unsigned int offset,
+			  unsigned char b)
 {
 	writeb(b, (io->addr)+(offset * io->regspacing));
 }
 
-static unsigned char intf_mem_inw(struct si_sm_io *io, unsigned int offset)
+static unsigned char intf_mem_inw(const struct si_sm_io *io,
+				  unsigned int offset)
 {
 	return (readw((io->addr)+(offset * io->regspacing)) >> io->regshift)
 		& 0xff;
 }
 
-static void intf_mem_outw(struct si_sm_io *io, unsigned int offset,
-		     unsigned char b)
+static void intf_mem_outw(const struct si_sm_io *io, unsigned int offset,
+			  unsigned char b)
 {
 	writeb(b << io->regshift, (io->addr)+(offset * io->regspacing));
 }
 
-static unsigned char intf_mem_inl(struct si_sm_io *io, unsigned int offset)
+static unsigned char intf_mem_inl(const struct si_sm_io *io,
+				  unsigned int offset)
 {
 	return (readl((io->addr)+(offset * io->regspacing)) >> io->regshift)
 		& 0xff;
 }
 
-static void intf_mem_outl(struct si_sm_io *io, unsigned int offset,
-		     unsigned char b)
+static void intf_mem_outl(const struct si_sm_io *io, unsigned int offset,
+			  unsigned char b)
 {
 	writel(b << io->regshift, (io->addr)+(offset * io->regspacing));
 }
 
 #ifdef readq
-static unsigned char mem_inq(struct si_sm_io *io, unsigned int offset)
+static unsigned char mem_inq(const struct si_sm_io *io, unsigned int offset)
 {
 	return (readq((io->addr)+(offset * io->regspacing)) >> io->regshift)
 		& 0xff;
 }
 
-static void mem_outq(struct si_sm_io *io, unsigned int offset,
+static void mem_outq(const struct si_sm_io *io, unsigned int offset,
 		     unsigned char b)
 {
 	writeq(b << io->regshift, (io->addr)+(offset * io->regspacing));
@@ -2522,7 +2525,7 @@ static void ipmi_pci_remove(struct pci_dev *pdev)
 	pci_disable_device(pdev);
 }
 
-static struct pci_device_id ipmi_pci_devices[] = {
+static const struct pci_device_id ipmi_pci_devices[] = {
 	{ PCI_DEVICE(PCI_HP_VENDOR_ID, PCI_MMC_DEVICE_ID) },
 	{ PCI_DEVICE_CLASS(PCI_ERMC_CLASSCODE, PCI_ERMC_CLASSCODE_MASK) },
 	{ 0, }
@@ -2751,7 +2754,7 @@ static int acpi_ipmi_probe(struct platform_device *dev)
 	return rv;
 }
 
-static struct acpi_device_id acpi_ipmi_match[] = {
+static const struct acpi_device_id acpi_ipmi_match[] = {
 	{ "IPI0001", 0 },
 	{ },
 };
@@ -3324,7 +3327,7 @@ static inline void wait_for_timer_and_thread(struct smi_info *smi_info)
 		del_timer_sync(&smi_info->si_timer);
 }
 
-static struct ipmi_default_vals
+static const struct ipmi_default_vals
 {
 	int type;
 	int port;
diff --git a/drivers/char/ipmi/ipmi_si_sm.h b/drivers/char/ipmi/ipmi_si_sm.h
index df89f73475fb99..a705027c0493f2 100644
--- a/drivers/char/ipmi/ipmi_si_sm.h
+++ b/drivers/char/ipmi/ipmi_si_sm.h
@@ -46,8 +46,8 @@ struct si_sm_data;
  * this interface.
  */
 struct si_sm_io {
-	unsigned char (*inputb)(struct si_sm_io *io, unsigned int offset);
-	void (*outputb)(struct si_sm_io *io,
+	unsigned char (*inputb)(const struct si_sm_io *io, unsigned int offset);
+	void (*outputb)(const struct si_sm_io *io,
 			unsigned int  offset,
 			unsigned char b);
 
@@ -135,7 +135,7 @@ struct si_sm_handlers {
 };
 
 /* Current state machines that we can use. */
-extern struct si_sm_handlers kcs_smi_handlers;
-extern struct si_sm_handlers smic_smi_handlers;
-extern struct si_sm_handlers bt_smi_handlers;
+extern const struct si_sm_handlers kcs_smi_handlers;
+extern const struct si_sm_handlers smic_smi_handlers;
+extern const struct si_sm_handlers bt_smi_handlers;
 
diff --git a/drivers/char/ipmi/ipmi_smic_sm.c b/drivers/char/ipmi/ipmi_smic_sm.c
index c8e77afa8b961c..8f7c73ff58f23c 100644
--- a/drivers/char/ipmi/ipmi_smic_sm.c
+++ b/drivers/char/ipmi/ipmi_smic_sm.c
@@ -589,7 +589,7 @@ static int smic_size(void)
 	return sizeof(struct si_sm_data);
 }
 
-struct si_sm_handlers smic_smi_handlers = {
+const struct si_sm_handlers smic_smi_handlers = {
 	.init_data         = init_smic_data,
 	.start_transaction = start_smic_transaction,
 	.get_result        = smic_get_result,
diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h
index 0b1e569f5ff5e6..41de0cf34c495c 100644
--- a/include/linux/ipmi_smi.h
+++ b/include/linux/ipmi_smi.h
@@ -207,7 +207,7 @@ static inline int ipmi_demangle_device_id(const unsigned char *data,
    upper layer until the start_processing() function in the handlers
    is called, and the lower layer must get the interface from that
    call. */
-int ipmi_register_smi(struct ipmi_smi_handlers *handlers,
+int ipmi_register_smi(const struct ipmi_smi_handlers *handlers,
 		      void                     *send_info,
 		      struct ipmi_device_id    *device_id,
 		      struct device            *dev,

From b0868dd5c17c0d9cc8919e786db2e428aa225621 Mon Sep 17 00:00:00 2001
From: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Date: Mon, 27 Jul 2015 14:55:16 +0900
Subject: [PATCH 480/734] ipmi: Remove unneeded set_run_to_completion call

send_panic_events() calls intf->handlers->set_run_to_completion(),
but it has already been done in the caller function panic_event().
Remove it from send_panic_events().

Signed-off-by: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_msghandler.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index e9ea29c4ec604f..5e31c339062eec 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -4368,9 +4368,7 @@ static void send_panic_events(char *str)
 			/* Interface is not ready. */
 			continue;
 
-		intf->run_to_completion = 1;
 		/* Send the event announcing the panic. */
-		intf->handlers->set_run_to_completion(intf->send_info, 1);
 		ipmi_panic_request_and_wait(intf, &addr, &msg);
 	}
 

From e45361d733d0a1432b0f6307375045e66ac02489 Mon Sep 17 00:00:00 2001
From: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Date: Mon, 27 Jul 2015 14:55:16 +0900
Subject: [PATCH 481/734] ipmi: Factor out message flushing procedure

Factor out message flushing procedure which is used in run-to-completion
mode.  This patch doesn't change the logic.

Signed-off-by: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_si_intf.c | 39 +++++++++++++++-----------------
 1 file changed, 18 insertions(+), 21 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 4a4a13dc98b3ae..5bd6d5b974cd78 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -924,11 +924,25 @@ static void check_start_timer_thread(struct smi_info *smi_info)
 	}
 }
 
+static void flush_messages(struct smi_info *smi_info)
+{
+	enum si_sm_result result;
+
+	/*
+	 * Currently, this function is called only in run-to-completion
+	 * mode.  This means we are single-threaded, no need for locks.
+	 */
+	result = smi_event_handler(smi_info, 0);
+	while (result != SI_SM_IDLE) {
+		udelay(SI_SHORT_TIMEOUT_USEC);
+		result = smi_event_handler(smi_info, SI_SHORT_TIMEOUT_USEC);
+	}
+}
+
 static void sender(void                *send_info,
 		   struct ipmi_smi_msg *msg)
 {
 	struct smi_info   *smi_info = send_info;
-	enum si_sm_result result;
 	unsigned long     flags;
 
 	debug_timestamp("Enqueue");
@@ -940,17 +954,7 @@ static void sender(void                *send_info,
 		 */
 		smi_info->waiting_msg = msg;
 
-		/*
-		 * Run to completion means we are single-threaded, no
-		 * need for locks.
-		 */
-
-		result = smi_event_handler(smi_info, 0);
-		while (result != SI_SM_IDLE) {
-			udelay(SI_SHORT_TIMEOUT_USEC);
-			result = smi_event_handler(smi_info,
-						   SI_SHORT_TIMEOUT_USEC);
-		}
+		flush_messages(smi_info);
 		return;
 	}
 
@@ -971,17 +975,10 @@ static void sender(void                *send_info,
 static void set_run_to_completion(void *send_info, bool i_run_to_completion)
 {
 	struct smi_info   *smi_info = send_info;
-	enum si_sm_result result;
 
 	smi_info->run_to_completion = i_run_to_completion;
-	if (i_run_to_completion) {
-		result = smi_event_handler(smi_info, 0);
-		while (result != SI_SM_IDLE) {
-			udelay(SI_SHORT_TIMEOUT_USEC);
-			result = smi_event_handler(smi_info,
-						   SI_SHORT_TIMEOUT_USEC);
-		}
-	}
+	if (i_run_to_completion)
+		flush_messages(smi_info);
 }
 
 /*

From 82802f968bd3118af04eaeb3814c21d9813be527 Mon Sep 17 00:00:00 2001
From: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Date: Mon, 27 Jul 2015 14:55:16 +0900
Subject: [PATCH 482/734] ipmi: Don't flush messages in sender() in
 run-to-completion mode

When flushing queued messages in run-to-completion mode,
smi_event_handler() is recursively called.

flush_messages()
 smi_event_handler()
  handle_transaction_done()
   deliver_recv_msg()
    ipmi_smi_msg_received()
     smi_recv_tasklet()
      sender()
       flush_messages()
        smi_event_handler()
         ...

The depth of the recursive call depends on the number of queued
messages, so it can cause a stack overflow if many messages have
been queued.

To solve this problem, this patch removes flush_messages()
from sender()@ipmi_si_intf.c.  Instead, add flush_messages() to
caller side of sender() if needed.  Additionally, to implement this,
add new handler flush_messages to struct ipmi_smi_handlers.

Signed-off-by: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>

Fixed up a comment and some spacing issues.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_msghandler.c |  3 +++
 drivers/char/ipmi/ipmi_si_intf.c    | 10 +++++-----
 include/linux/ipmi_smi.h            |  5 +++++
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 5e31c339062eec..6e191ff910e623 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -4295,6 +4295,9 @@ static void ipmi_panic_request_and_wait(ipmi_smi_t           intf,
 			    0, 1); /* Don't retry, and don't wait. */
 	if (rv)
 		atomic_sub(2, &panic_done_count);
+	else if (intf->handlers->flush_messages)
+		intf->handlers->flush_messages(intf->send_info);
+
 	while (atomic_read(&panic_done_count) != 0)
 		ipmi_poll(intf);
 }
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 5bd6d5b974cd78..2f4cf6e78f72e7 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -924,8 +924,9 @@ static void check_start_timer_thread(struct smi_info *smi_info)
 	}
 }
 
-static void flush_messages(struct smi_info *smi_info)
+static void flush_messages(void *send_info)
 {
+	struct smi_info *smi_info = send_info;
 	enum si_sm_result result;
 
 	/*
@@ -949,12 +950,10 @@ static void sender(void                *send_info,
 
 	if (smi_info->run_to_completion) {
 		/*
-		 * If we are running to completion, start it and run
-		 * transactions until everything is clear.
+		 * If we are running to completion, start it.  Upper
+		 * layer will call flush_messages to clear it out.
 		 */
 		smi_info->waiting_msg = msg;
-
-		flush_messages(smi_info);
 		return;
 	}
 
@@ -1260,6 +1259,7 @@ static const struct ipmi_smi_handlers handlers = {
 	.set_need_watch		= set_need_watch,
 	.set_maintenance_mode   = set_maintenance_mode,
 	.set_run_to_completion  = set_run_to_completion,
+	.flush_messages		= flush_messages,
 	.poll			= poll,
 };
 
diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h
index 41de0cf34c495c..f8cea14485ddde 100644
--- a/include/linux/ipmi_smi.h
+++ b/include/linux/ipmi_smi.h
@@ -115,6 +115,11 @@ struct ipmi_smi_handlers {
 	   implement it. */
 	void (*set_need_watch)(void *send_info, bool enable);
 
+	/*
+	 * Called when flushing all pending messages.
+	 */
+	void (*flush_messages)(void *send_info);
+
 	/* Called when the interface should go into "run to
 	   completion" mode.  If this call sets the value to true, the
 	   interface should make sure that all messages are flushed

From 06e5e345fea8df24b1d935f98741343df4cab664 Mon Sep 17 00:00:00 2001
From: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Date: Mon, 27 Jul 2015 14:55:16 +0900
Subject: [PATCH 483/734] ipmi: Avoid touching possible corrupted lists in the
 panic context

When processing queued messages in the panic context, IPMI driver
tries to do it without any locking to avoid deadlocks.  However,
this means we can touch a corrupted list if the kernel panicked
while manipulating the list.  Fortunately, current `add-tail and
del-from-head' style implementation won't touch the corrupted part,
but it is inherently risky.

To get rid of the risk, this patch re-initializes the message lists
on panic if the related spinlock has already been acquired.  As the
result, we may lose queued messages, but it's not so painful.
Dropping messages on the received message list is also less
problematic because no one can respond the received messages.

Signed-off-by: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>

Fixed a comment typo.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_msghandler.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 6e191ff910e623..cdac5f7037e5c2 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -4511,6 +4511,23 @@ static int panic_event(struct notifier_block *this,
 			/* Interface is not ready. */
 			continue;
 
+		/*
+		 * If we were interrupted while locking xmit_msgs_lock or
+		 * waiting_rcv_msgs_lock, the corresponding list may be
+		 * corrupted.  In this case, drop items on the list for
+		 * the safety.
+		 */
+		if (!spin_trylock(&intf->xmit_msgs_lock)) {
+			INIT_LIST_HEAD(&intf->xmit_msgs);
+			INIT_LIST_HEAD(&intf->hp_xmit_msgs);
+		} else
+			spin_unlock(&intf->xmit_msgs_lock);
+
+		if (!spin_trylock(&intf->waiting_rcv_msgs_lock))
+			INIT_LIST_HEAD(&intf->waiting_rcv_msgs);
+		else
+			spin_unlock(&intf->waiting_rcv_msgs_lock);
+
 		intf->run_to_completion = 1;
 		intf->handlers->set_run_to_completion(intf->send_info, 1);
 	}

From c49c097610fe1aabf86111297280a718abb5dcc2 Mon Sep 17 00:00:00 2001
From: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Date: Mon, 27 Jul 2015 14:55:16 +0900
Subject: [PATCH 484/734] ipmi: Don't call receive handler in the panic context

Received handlers defined as ipmi_recv_hndl member of struct
ipmi_user_hndl can take a spinlock.  This means that if the kernel
panics while holding the lock, a deadlock may happen on the lock
while flushing queued messages in the panic context.

Calling the receive handler doesn't make much meanings in the panic
context, simply skip it to avoid possible deadlocks.

Signed-off-by: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_msghandler.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index cdac5f7037e5c2..e3536da05c88aa 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -744,7 +744,13 @@ static void deliver_response(struct ipmi_recv_msg *msg)
 			ipmi_inc_stat(intf, unhandled_local_responses);
 		}
 		ipmi_free_recv_msg(msg);
-	} else {
+	} else if (!oops_in_progress) {
+		/*
+		 * If we are running in the panic context, calling the
+		 * receive handler doesn't much meaning and has a deadlock
+		 * risk.  At this moment, simply skip it in that case.
+		 */
+
 		ipmi_user_t user = msg->user;
 		user->handler->ipmi_recv_hndl(msg, user->handler_data);
 	}

From d08828973d96eb26e48fb7ca8fb8a8d49adbe53a Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Tue, 18 Aug 2015 14:29:10 -0500
Subject: [PATCH 485/734] ipmi: Compensate for BMCs that wont set the irq
 enable bit

It appears that some BMCs support interrupts but don't support setting
the irq enable bits.  The interrupts are just always on.  Sigh.
Add code to compensate.

The new code was very similar to another functions, so this also
factors out the common code into other functions.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
Tested-by: Henrik Korkuc <henrik@kirneh.eu>
---
 drivers/char/ipmi/ipmi_si_intf.c | 180 +++++++++++++++++++++++--------
 1 file changed, 137 insertions(+), 43 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 2f4cf6e78f72e7..21bddc10e321a7 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -262,9 +262,21 @@ struct smi_info {
 	bool supports_event_msg_buff;
 
 	/*
-	 * Can we clear the global enables receive irq bit?
+	 * Can we disable interrupts the global enables receive irq
+	 * bit?  There are currently two forms of brokenness, some
+	 * systems cannot disable the bit (which is technically within
+	 * the spec but a bad idea) and some systems have the bit
+	 * forced to zero even though interrupts work (which is
+	 * clearly outside the spec).  The next bool tells which form
+	 * of brokenness is present.
 	 */
-	bool cannot_clear_recv_irq_bit;
+	bool cannot_disable_irq;
+
+	/*
+	 * Some systems are broken and cannot set the irq enable
+	 * bit, even if they support interrupts.
+	 */
+	bool irq_enable_broken;
 
 	/*
 	 * Did we get an attention that we did not handle?
@@ -554,13 +566,14 @@ static u8 current_global_enables(struct smi_info *smi_info, u8 base,
 	if (smi_info->supports_event_msg_buff)
 		enables |= IPMI_BMC_EVT_MSG_BUFF;
 
-	if ((smi_info->irq && !smi_info->interrupt_disabled) ||
-	    smi_info->cannot_clear_recv_irq_bit)
+	if (((smi_info->irq && !smi_info->interrupt_disabled) ||
+	     smi_info->cannot_disable_irq) &&
+	    !smi_info->irq_enable_broken)
 		enables |= IPMI_BMC_RCV_MSG_INTR;
 
 	if (smi_info->supports_event_msg_buff &&
-	    smi_info->irq && !smi_info->interrupt_disabled)
-
+	    smi_info->irq && !smi_info->interrupt_disabled &&
+	    !smi_info->irq_enable_broken)
 		enables |= IPMI_BMC_EVT_MSG_INTR;
 
 	*irq_on = enables & (IPMI_BMC_EVT_MSG_INTR | IPMI_BMC_RCV_MSG_INTR);
@@ -2908,12 +2921,7 @@ static int try_get_dev_id(struct smi_info *smi_info)
 	return rv;
 }
 
-/*
- * Some BMCs do not support clearing the receive irq bit in the global
- * enables (even if they don't support interrupts on the BMC).  Check
- * for this and handle it properly.
- */
-static void check_clr_rcv_irq(struct smi_info *smi_info)
+static int get_global_enables(struct smi_info *smi_info, u8 *enables)
 {
 	unsigned char         msg[3];
 	unsigned char         *resp;
@@ -2921,12 +2929,8 @@ static void check_clr_rcv_irq(struct smi_info *smi_info)
 	int                   rv;
 
 	resp = kmalloc(IPMI_MAX_MSG_LENGTH, GFP_KERNEL);
-	if (!resp) {
-		printk(KERN_WARNING PFX "Out of memory allocating response for"
-		       " global enables command, cannot check recv irq bit"
-		       " handling.\n");
-		return;
-	}
+	if (!resp)
+		return -ENOMEM;
 
 	msg[0] = IPMI_NETFN_APP_REQUEST << 2;
 	msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD;
@@ -2934,9 +2938,9 @@ static void check_clr_rcv_irq(struct smi_info *smi_info)
 
 	rv = wait_for_msg_done(smi_info);
 	if (rv) {
-		printk(KERN_WARNING PFX "Error getting response from get"
-		       " global enables command, cannot check recv irq bit"
-		       " handling.\n");
+		dev_warn(smi_info->dev,
+			 "Error getting response from get global enables command: %d\n",
+			 rv);
 		goto out;
 	}
 
@@ -2947,27 +2951,44 @@ static void check_clr_rcv_irq(struct smi_info *smi_info)
 			resp[0] != (IPMI_NETFN_APP_REQUEST | 1) << 2 ||
 			resp[1] != IPMI_GET_BMC_GLOBAL_ENABLES_CMD   ||
 			resp[2] != 0) {
-		printk(KERN_WARNING PFX "Invalid return from get global"
-		       " enables command, cannot check recv irq bit"
-		       " handling.\n");
+		dev_warn(smi_info->dev,
+			 "Invalid return from get global enables command: %ld %x %x %x\n",
+			 resp_len, resp[0], resp[1], resp[2]);
 		rv = -EINVAL;
 		goto out;
+	} else {
+		*enables = resp[3];
 	}
 
-	if ((resp[3] & IPMI_BMC_RCV_MSG_INTR) == 0)
-		/* Already clear, should work ok. */
-		goto out;
+out:
+	kfree(resp);
+	return rv;
+}
+
+/*
+ * Returns 1 if it gets an error from the command.
+ */
+static int set_global_enables(struct smi_info *smi_info, u8 enables)
+{
+	unsigned char         msg[3];
+	unsigned char         *resp;
+	unsigned long         resp_len;
+	int                   rv;
+
+	resp = kmalloc(IPMI_MAX_MSG_LENGTH, GFP_KERNEL);
+	if (!resp)
+		return -ENOMEM;
 
 	msg[0] = IPMI_NETFN_APP_REQUEST << 2;
 	msg[1] = IPMI_SET_BMC_GLOBAL_ENABLES_CMD;
-	msg[2] = resp[3] & ~IPMI_BMC_RCV_MSG_INTR;
+	msg[2] = enables;
 	smi_info->handlers->start_transaction(smi_info->si_sm, msg, 3);
 
 	rv = wait_for_msg_done(smi_info);
 	if (rv) {
-		printk(KERN_WARNING PFX "Error getting response from set"
-		       " global enables command, cannot check recv irq bit"
-		       " handling.\n");
+		dev_warn(smi_info->dev,
+			 "Error getting response from set global enables command: %d\n",
+			 rv);
 		goto out;
 	}
 
@@ -2977,25 +2998,93 @@ static void check_clr_rcv_irq(struct smi_info *smi_info)
 	if (resp_len < 3 ||
 			resp[0] != (IPMI_NETFN_APP_REQUEST | 1) << 2 ||
 			resp[1] != IPMI_SET_BMC_GLOBAL_ENABLES_CMD) {
-		printk(KERN_WARNING PFX "Invalid return from get global"
-		       " enables command, cannot check recv irq bit"
-		       " handling.\n");
+		dev_warn(smi_info->dev,
+			 "Invalid return from set global enables command: %ld %x %x\n",
+			 resp_len, resp[0], resp[1]);
 		rv = -EINVAL;
 		goto out;
 	}
 
-	if (resp[2] != 0) {
+	if (resp[2] != 0)
+		rv = 1;
+
+out:
+	kfree(resp);
+	return rv;
+}
+
+/*
+ * Some BMCs do not support clearing the receive irq bit in the global
+ * enables (even if they don't support interrupts on the BMC).  Check
+ * for this and handle it properly.
+ */
+static void check_clr_rcv_irq(struct smi_info *smi_info)
+{
+	u8 enables = 0;
+	int rv;
+
+	rv = get_global_enables(smi_info, &enables);
+	if (!rv) {
+		if ((enables & IPMI_BMC_RCV_MSG_INTR) == 0)
+			/* Already clear, should work ok. */
+			return;
+
+		enables &= ~IPMI_BMC_RCV_MSG_INTR;
+		rv = set_global_enables(smi_info, enables);
+	}
+
+	if (rv < 0) {
+		dev_err(smi_info->dev,
+			"Cannot check clearing the rcv irq: %d\n", rv);
+		return;
+	}
+
+	if (rv) {
 		/*
 		 * An error when setting the event buffer bit means
 		 * clearing the bit is not supported.
 		 */
-		printk(KERN_WARNING PFX "The BMC does not support clearing"
-		       " the recv irq bit, compensating, but the BMC needs to"
-		       " be fixed.\n");
-		smi_info->cannot_clear_recv_irq_bit = true;
+		dev_warn(smi_info->dev,
+			 "The BMC does not support clearing the recv irq bit, compensating, but the BMC needs to be fixed.\n");
+		smi_info->cannot_disable_irq = true;
+	}
+}
+
+/*
+ * Some BMCs do not support setting the interrupt bits in the global
+ * enables even if they support interrupts.  Clearly bad, but we can
+ * compensate.
+ */
+static void check_set_rcv_irq(struct smi_info *smi_info)
+{
+	u8 enables = 0;
+	int rv;
+
+	if (!smi_info->irq)
+		return;
+
+	rv = get_global_enables(smi_info, &enables);
+	if (!rv) {
+		enables |= IPMI_BMC_RCV_MSG_INTR;
+		rv = set_global_enables(smi_info, enables);
+	}
+
+	if (rv < 0) {
+		dev_err(smi_info->dev,
+			"Cannot check setting the rcv irq: %d\n", rv);
+		return;
+	}
+
+	if (rv) {
+		/*
+		 * An error when setting the event buffer bit means
+		 * setting the bit is not supported.
+		 */
+		dev_warn(smi_info->dev,
+			 "The BMC does not support setting the recv irq bit, compensating, but the BMC needs to be fixed.\n");
+		smi_info->cannot_disable_irq = true;
+		smi_info->irq_enable_broken = true;
 	}
- out:
-	kfree(resp);
 }
 
 static int try_enable_event_buffer(struct smi_info *smi_info)
@@ -3316,6 +3405,12 @@ static void setup_xaction_handlers(struct smi_info *smi_info)
 	setup_dell_poweredge_bt_xaction_handler(smi_info);
 }
 
+static void check_for_broken_irqs(struct smi_info *smi_info)
+{
+	check_clr_rcv_irq(smi_info);
+	check_set_rcv_irq(smi_info);
+}
+
 static inline void wait_for_timer_and_thread(struct smi_info *smi_info)
 {
 	if (smi_info->thread != NULL)
@@ -3493,10 +3588,9 @@ static int try_smi_init(struct smi_info *new_smi)
 		goto out_err;
 	}
 
-	check_clr_rcv_irq(new_smi);
-
 	setup_oem_data_handler(new_smi);
 	setup_xaction_handlers(new_smi);
+	check_for_broken_irqs(new_smi);
 
 	new_smi->waiting_msg = NULL;
 	new_smi->curr_msg = NULL;

From acbd9ae70a94bdc626508f444879e19ebe1c421f Mon Sep 17 00:00:00 2001
From: Brijesh Singh <brijeshkumar.singh@amd.com>
Date: Mon, 24 Aug 2015 09:15:25 -0500
Subject: [PATCH 486/734] ipmi: add of_device_id in MODULE_DEVICE_TABLE

Fix autoloading ipmi modules when using device tree.

Signed-off-by: Brijesh Singh <brijeshkumar.singh@amd.com>

Moved this change up into the CONFIG_OF section to account
for changes to the probing code.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_si_intf.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 21bddc10e321a7..654f6f36a071c1 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -2646,6 +2646,7 @@ static int of_ipmi_probe(struct platform_device *dev)
 	}
 	return 0;
 }
+MODULE_DEVICE_TABLE(of, of_ipmi_match);
 #else
 #define of_ipmi_match NULL
 static int of_ipmi_probe(struct platform_device *dev)

From bf2d087749d91e1fa2826edde1e2fd650d3053ca Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Thu, 27 Aug 2015 15:49:18 -0500
Subject: [PATCH 487/734] ipmi:ssif: Add a module parm to specify that SMBus
 alerts don't work

They are broken on some platforms, this gives people a chance to work
around it until the firmware is fixed.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_ssif.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index b043d8d4582350..877205d2204686 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -1136,6 +1136,10 @@ module_param_array(slave_addrs, int, &num_slave_addrs, 0);
 MODULE_PARM_DESC(slave_addrs,
 		 "The default IPMB slave address for the controller.");
 
+static bool alerts_broken;
+module_param(alerts_broken, bool, 0);
+MODULE_PARM_DESC(alerts_broken, "Don't enable alerts for the controller.");
+
 /*
  * Bit 0 enables message debugging, bit 1 enables state debugging, and
  * bit 2 enables timing debugging.  This is an array indexed by
@@ -1582,6 +1586,10 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		ssif_info->global_enables |= IPMI_BMC_EVT_MSG_BUFF;
 	}
 
+	/* Some systems don't behave well if you enable alerts. */
+	if (alerts_broken)
+		goto found;
+
 	msg[0] = IPMI_NETFN_APP_REQUEST << 2;
 	msg[1] = IPMI_SET_BMC_GLOBAL_ENABLES_CMD;
 	msg[2] = ssif_info->global_enables | IPMI_BMC_RCV_MSG_INTR;

From 26803821463c4e76322559b3dfb0c6b0fcc8aee8 Mon Sep 17 00:00:00 2001
From: Riku Voipio <riku.voipio@linaro.org>
Date: Wed, 2 Sep 2015 12:57:08 +0300
Subject: [PATCH 488/734] package/Makefile: move source tar creation to a
 function

Split source tarball creation from rpm-pkg target
so it can be used from deb-pkg target as well. As
added bonus, we can now pretty print TAR the name of
tarball created in quiet mode

This patch prepares the groundwork for deb-pkg source
package adding bit.

Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
Signed-off-by: Michal Marek <mmarek@suse.com>
---
 scripts/package/Makefile | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/scripts/package/Makefile b/scripts/package/Makefile
index 8b11d5adec7f58..297e75684738c7 100644
--- a/scripts/package/Makefile
+++ b/scripts/package/Makefile
@@ -24,27 +24,30 @@
 # Remove hyphens since they have special meaning in RPM filenames
 KERNELPATH := kernel-$(subst -,_,$(KERNELRELEASE))
 # Include only those top-level files that are needed by make, plus the GPL copy
-TAR_CONTENT := $(KBUILD_ALLDIRS) kernel.spec .config .scmversion Makefile \
+TAR_CONTENT := $(KBUILD_ALLDIRS) .config .scmversion Makefile \
                Kbuild Kconfig COPYING $(wildcard localversion*)
-TAR_CONTENT := $(addprefix $(KERNELPATH)/,$(TAR_CONTENT))
 MKSPEC     := $(srctree)/scripts/package/mkspec
 
+quiet_cmd_src_tar = TAR     $(2).tar.gz
+      cmd_src_tar = \
+if test "$(objtree)" != "$(srctree)"; then \
+	echo "Building source tarball is not possible outside the"; \
+	echo "kernel source tree. Don't set KBUILD_OUTPUT, or use the"; \
+	echo "binrpm-pkg or bindeb-pkg target instead."; \
+	false; \
+fi ; \
+$(srctree)/scripts/setlocalversion --save-scmversion; \
+ln -sf $(srctree) $(2); \
+tar -cz $(RCS_TAR_IGNORE) -f $(2).tar.gz \
+	$(addprefix $(2)/,$(TAR_CONTENT) $(3)); \
+rm -f $(2) $(objtree)/.scmversion
+
 # rpm-pkg
 # ---------------------------------------------------------------------------
 rpm-pkg rpm: FORCE
-	@if test "$(objtree)" != "$(srctree)"; then \
-		echo "Building source + binary RPM is not possible outside the"; \
-		echo "kernel source tree. Don't set KBUILD_OUTPUT, or use the"; \
-		echo "binrpm-pkg target instead."; \
-		false; \
-	fi
 	$(MAKE) clean
-	ln -sf $(srctree) $(KERNELPATH)
 	$(CONFIG_SHELL) $(MKSPEC) >$(objtree)/kernel.spec
-	$(CONFIG_SHELL) $(srctree)/scripts/setlocalversion --save-scmversion
-	tar -cz $(RCS_TAR_IGNORE) -f $(KERNELPATH).tar.gz $(TAR_CONTENT)
-	rm $(KERNELPATH)
-	rm -f $(objtree)/.scmversion
+	$(call cmd,src_tar,$(KERNELPATH),kernel.spec)
 	$(CONFIG_SHELL) $(srctree)/scripts/mkversion > $(objtree)/.tmp_version
 	mv -f $(objtree)/.tmp_version $(objtree)/.version
 	rpmbuild --target $(UTS_MACHINE) -ta $(KERNELPATH).tar.gz

From 3716001bcb7f5822382ac1f2f54226b87312cc6b Mon Sep 17 00:00:00 2001
From: Riku Voipio <riku.voipio@linaro.org>
Date: Wed, 2 Sep 2015 12:57:24 +0300
Subject: [PATCH 489/734] deb-pkg: add source package

Make deb-pkg build both source and binary package like make rpm-pkg does.
For people who only need binary kernel package, there is now bindeb-pkg
target, same target also used to build the .deb files if built from the
source package using dpkg-buildpackage.

Generated source package will build the same kernel .config than what
was available for make deb-pkg. The name of the source package can
be set with KDEB_SOURCENAME enviroment variable.

The source package is useful for GPL compliance, or for feeding to a
automated debian package builder.

Cc: Chris J Arges <chris.j.arges@canonical.com>
Reviewed-by: maximilian attems <maks@stro.at>
Reviewed-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
Signed-off-by: Michal Marek <mmarek@suse.com>
---
 scripts/package/Makefile | 17 +++++++++++++----
 scripts/package/builddeb | 38 ++++++++++++++++++++++++++++++++++++--
 2 files changed, 49 insertions(+), 6 deletions(-)

diff --git a/scripts/package/Makefile b/scripts/package/Makefile
index 297e75684738c7..1aca224e8597c8 100644
--- a/scripts/package/Makefile
+++ b/scripts/package/Makefile
@@ -23,6 +23,8 @@
 
 # Remove hyphens since they have special meaning in RPM filenames
 KERNELPATH := kernel-$(subst -,_,$(KERNELRELEASE))
+KDEB_SOURCENAME ?= linux-$(KERNELRELEASE)
+export KDEB_SOURCENAME
 # Include only those top-level files that are needed by make, plus the GPL copy
 TAR_CONTENT := $(KBUILD_ALLDIRS) .config .scmversion Makefile \
                Kbuild Kconfig COPYING $(wildcard localversion*)
@@ -83,11 +85,17 @@ quiet_cmd_builddeb = BUILDDEB
 	} && \
 	\
 	$$KBUILD_PKG_ROOTCMD $(CONFIG_SHELL) \
-		$(srctree)/scripts/package/builddeb
+		$(srctree)/scripts/package/builddeb $@
 
 deb-pkg: FORCE
+	$(MAKE) clean
+	$(call cmd,src_tar,$(KDEB_SOURCENAME))
+	$(MAKE) KBUILD_SRC=
+	+$(call cmd,builddeb)
+
+bindeb-pkg: FORCE
 	$(MAKE) KBUILD_SRC=
-	$(call cmd,builddeb)
+	+$(call cmd,builddeb)
 
 clean-dirs += $(objtree)/debian/
 
@@ -132,8 +140,9 @@ perf-%pkg: FORCE
 # ---------------------------------------------------------------------------
 help: FORCE
 	@echo '  rpm-pkg             - Build both source and binary RPM kernel packages'
-	@echo '  binrpm-pkg          - Build only the binary kernel package'
-	@echo '  deb-pkg             - Build the kernel as a deb package'
+	@echo '  binrpm-pkg          - Build only the binary kernel RPM package'
+	@echo '  deb-pkg             - Build both source and binary deb kernel packages'
+	@echo '  bindeb-pkg          - Build only the binary kernel deb package'
 	@echo '  tar-pkg             - Build the kernel as an uncompressed tarball'
 	@echo '  targz-pkg           - Build the kernel as a gzip compressed tarball'
 	@echo '  tarbz2-pkg          - Build the kernel as a bzip2 compressed tarball'
diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index 0c18cfd9328761..0cd46e129920e8 100755
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -79,6 +79,7 @@ if [ -n "$KDEB_PKGVERSION" ]; then
 else
 	packageversion=$version-$revision
 fi
+sourcename=$KDEB_SOURCENAME
 tmpdir="$objtree/debian/tmp"
 fwdir="$objtree/debian/fwtmp"
 kernel_headers_dir="$objtree/debian/hdrtmp"
@@ -238,7 +239,7 @@ fi
 
 # Generate a simple changelog template
 cat <<EOF > debian/changelog
-linux-upstream ($packageversion) $distribution; urgency=low
+$sourcename ($packageversion) $distribution; urgency=low
 
   * Custom built Linux kernel.
 
@@ -265,12 +266,16 @@ On Debian GNU/Linux systems, the complete text of the GNU General Public
 License version 2 can be found in \`/usr/share/common-licenses/GPL-2'.
 EOF
 
+
+build_depends="bc, kmod, cpio "
+
 # Generate a control file
 cat <<EOF > debian/control
-Source: linux-upstream
+Source: $sourcename
 Section: kernel
 Priority: optional
 Maintainer: $maintainer
+Build-Depends: $build_depends
 Standards-Version: 3.8.4
 Homepage: http://www.kernel.org/
 EOF
@@ -391,4 +396,33 @@ EOF
 	create_package "$dbg_packagename" "$dbg_dir"
 fi
 
+if [ "x$1" = "xdeb-pkg" ]
+then
+    cat <<EOF > debian/rules
+#!/usr/bin/make -f
+
+build:
+	\$(MAKE)
+
+binary-arch:
+	\$(MAKE) KDEB_SOURCENAME=${sourcename} KDEB_PKGVERSION=${packageversion} bindeb-pkg
+
+clean:
+	rm -rf debian/*tmp
+	mv debian/ debian.backup # debian/ might be cleaned away
+	\$(MAKE) clean
+	mv debian.backup debian
+
+binary: binary-arch
+EOF
+	mv ${sourcename}.tar.gz ../${sourcename}_${version}.orig.tar.gz
+	tar caf ../${sourcename}_${packageversion}.debian.tar.gz debian/{copyright,rules,changelog,control}
+	dpkg-source -cdebian/control -ldebian/changelog --format="3.0 (custom)" --target-format="3.0 (quilt)" \
+		-b / ../${sourcename}_${version}.orig.tar.gz  ../${sourcename}_${packageversion}.debian.tar.gz
+	mv ${sourcename}_${packageversion}*dsc ..
+	dpkg-genchanges > ../${sourcename}_${packageversion}_${debarch}.changes
+else
+	dpkg-genchanges -b > ../${sourcename}_${packageversion}_${debarch}.changes
+fi
+
 exit 0

From 5631d9c429857194bd55d7bcd8fa5bdd1a9899a3 Mon Sep 17 00:00:00 2001
From: Michal Marek <mmarek@suse.com>
Date: Wed, 19 Aug 2015 17:36:41 +0200
Subject: [PATCH 490/734] kbuild: Fix clang detection

We cannot detect clang before including the arch Makefile, because that
can set the default cross compiler. We also cannot detect clang after
including the arch Makefile, because powerpc wants to know about clang.
Solve this by using an deferred variable. This costs us a few shell
invocations, but this is only a constant number.

Reported-by: Behan Webster <behanw@converseincode.com>
Reported-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michal Marek <mmarek@suse.com>
---
 Makefile                   | 9 +--------
 arch/powerpc/Makefile      | 8 ++++----
 scripts/Kbuild.include     | 4 ++++
 scripts/Makefile.extrawarn | 2 +-
 4 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/Makefile b/Makefile
index 13270c0a93363f..5ccbb58553aba7 100644
--- a/Makefile
+++ b/Makefile
@@ -661,14 +661,7 @@ endif
 endif
 KBUILD_CFLAGS += $(stackp-flag)
 
-ifeq ($(shell $(CC) -v 2>&1 | grep -c "clang version"), 1)
-COMPILER := clang
-else
-COMPILER := gcc
-endif
-export COMPILER
-
-ifeq ($(COMPILER),clang)
+ifeq ($(cc-name),clang)
 KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
 KBUILD_CPPFLAGS += $(call cc-option,-Wno-unknown-warning-option,)
 KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable)
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 05f464eb69527e..dfe88896b06c09 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -67,7 +67,7 @@ UTS_MACHINE := $(OLDARCH)
 
 ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
 override CC	+= -mlittle-endian
-ifneq ($(COMPILER),clang)
+ifneq ($(cc-name),clang)
 override CC	+= -mno-strict-align
 endif
 override AS	+= -mlittle-endian
@@ -333,7 +333,7 @@ TOUT	:= .tmp_gas_check
 # - Require gcc 4.0 or above on 64-bit
 # - gcc-4.2.0 has issues compiling modules on 64-bit
 checkbin:
-	@if test "${COMPILER}" != "clang" \
+	@if test "$(cc-name)" != "clang" \
 	    && test "$(cc-version)" = "0304" ; then \
 		if ! /bin/echo mftb 5 | $(AS) -v -mppc -many -o $(TOUT) >/dev/null 2>&1 ; then \
 			echo -n '*** ${VERSION}.${PATCHLEVEL} kernels no longer build '; \
@@ -342,14 +342,14 @@ checkbin:
 			false; \
 		fi ; \
 	fi
-	@if test "${COMPILER}" != "clang" \
+	@if test "$(cc-name)" != "clang" \
 	    && test "$(cc-version)" -lt "0400" \
 	    && test "x${CONFIG_PPC64}" = "xy" ; then \
                 echo -n "Sorry, GCC v4.0 or above is required to build " ; \
                 echo "the 64-bit powerpc kernel." ; \
                 false ; \
         fi
-	@if test "${COMPILER}" != "clang" \
+	@if test "$(cc-name)" != "clang" \
 	    && test "$(cc-fullversion)" = "040200" \
 	    && test "x${CONFIG_MODULES}${CONFIG_PPC64}" = "xyy" ; then \
 		echo -n '*** GCC-4.2.0 cannot compile the 64-bit powerpc ' ; \
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index d3437b82ac256c..3523df61339105 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -128,6 +128,10 @@ cc-option-align = $(subst -functions=0,,\
 cc-disable-warning = $(call try-run,\
 	$(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
 
+# cc-name
+# Expands to either gcc or clang
+cc-name = $(shell $(CC) -v 2>&1 | grep -q "clang version" && echo clang || echo gcc)
+
 # cc-version
 cc-version = $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-version.sh $(CC))
 
diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
index f734033af219d2..4efedcbe4165f4 100644
--- a/scripts/Makefile.extrawarn
+++ b/scripts/Makefile.extrawarn
@@ -56,7 +56,7 @@ endif
 KBUILD_CFLAGS += $(warning)
 else
 
-ifeq ($(COMPILER),clang)
+ifeq ($(cc-name),clang)
 KBUILD_CFLAGS += $(call cc-disable-warning, initializer-overrides)
 KBUILD_CFLAGS += $(call cc-disable-warning, unused-value)
 KBUILD_CFLAGS += $(call cc-disable-warning, format)

From 71c6da846be478a61556717ef1ee1cea91f5d6a8 Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@odin.com>
Date: Thu, 3 Sep 2015 14:32:01 +0300
Subject: [PATCH 491/734] crypto: ghash-clmulni: specify context size for ghash
 async algorithm

Currently context size (cra_ctxsize) doesn't specified for
ghash_async_alg. Which means it's zero. Thus crypto_create_tfm()
doesn't allocate needed space for ghash_async_ctx, so any
read/write to ctx (e.g. in ghash_async_init_tfm()) is not valid.

Cc: stable@vger.kernel.org
Signed-off-by: Andrey Ryabinin <aryabinin@odin.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/ghash-clmulni-intel_glue.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index 64d7cf1b50e112..440df0c7a2eef7 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -294,6 +294,7 @@ static struct ahash_alg ghash_async_alg = {
 			.cra_name		= "ghash",
 			.cra_driver_name	= "ghash-clmulni",
 			.cra_priority		= 400,
+			.cra_ctxsize		= sizeof(struct ghash_async_ctx),
 			.cra_flags		= CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
 			.cra_blocksize		= GHASH_BLOCK_SIZE,
 			.cra_type		= &crypto_ahash_type,

From 2fb07a10e0aa699ddb12aba1459208579bdc9802 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Tue, 23 Jun 2015 11:15:10 -0500
Subject: [PATCH 492/734] rtc: ds1307: Convert to threaded IRQ

The driver currently emulates the concept of threaded IRQ using a
workqueue, which it really does not need to. Instead, switch over to
threaded_irq handlers which is meant precisely for the same purpose.

Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ds1307.c | 59 +++++++++++++---------------------------
 1 file changed, 19 insertions(+), 40 deletions(-)

diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index 6e76de1856fc14..8ea496e54a2e82 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -114,7 +114,6 @@ struct ds1307 {
 #define HAS_ALARM	1		/* bit 1 == irq claimed */
 	struct i2c_client	*client;
 	struct rtc_device	*rtc;
-	struct work_struct	work;
 	s32 (*read_block_data)(const struct i2c_client *client, u8 command,
 			       u8 length, u8 *values);
 	s32 (*write_block_data)(const struct i2c_client *client, u8 command,
@@ -311,27 +310,17 @@ static s32 ds1307_native_smbus_read_block_data(const struct i2c_client *client,
 /*----------------------------------------------------------------------*/
 
 /*
- * The IRQ logic includes a "real" handler running in IRQ context just
- * long enough to schedule this workqueue entry.   We need a task context
- * to talk to the RTC, since I2C I/O calls require that; and disable the
- * IRQ until we clear its status on the chip, so that this handler can
- * work with any type of triggering (not just falling edge).
- *
  * The ds1337 and ds1339 both have two alarms, but we only use the first
  * one (with a "seconds" field).  For ds1337 we expect nINTA is our alarm
  * signal; ds1339 chips have only one alarm signal.
  */
-static void ds1307_work(struct work_struct *work)
+static irqreturn_t ds1307_irq(int irq, void *dev_id)
 {
-	struct ds1307		*ds1307;
-	struct i2c_client	*client;
-	struct mutex		*lock;
+	struct i2c_client	*client = dev_id;
+	struct ds1307		*ds1307 = i2c_get_clientdata(client);
+	struct mutex		*lock = &ds1307->rtc->ops_lock;
 	int			stat, control;
 
-	ds1307 = container_of(work, struct ds1307, work);
-	client = ds1307->client;
-	lock = &ds1307->rtc->ops_lock;
-
 	mutex_lock(lock);
 	stat = i2c_smbus_read_byte_data(client, DS1337_REG_STATUS);
 	if (stat < 0)
@@ -352,18 +341,8 @@ static void ds1307_work(struct work_struct *work)
 	}
 
 out:
-	if (test_bit(HAS_ALARM, &ds1307->flags))
-		enable_irq(client->irq);
 	mutex_unlock(lock);
-}
-
-static irqreturn_t ds1307_irq(int irq, void *dev_id)
-{
-	struct i2c_client	*client = dev_id;
-	struct ds1307		*ds1307 = i2c_get_clientdata(client);
 
-	disable_irq_nosync(irq);
-	schedule_work(&ds1307->work);
 	return IRQ_HANDLED;
 }
 
@@ -634,13 +613,14 @@ static const struct rtc_class_ops ds13xx_rtc_ops = {
 					 MCP794XX_BIT_ALMX_C1 | \
 					 MCP794XX_BIT_ALMX_C2)
 
-static void mcp794xx_work(struct work_struct *work)
+static irqreturn_t mcp794xx_irq(int irq, void *dev_id)
 {
-	struct ds1307 *ds1307 = container_of(work, struct ds1307, work);
-	struct i2c_client *client = ds1307->client;
+	struct i2c_client       *client = dev_id;
+	struct ds1307           *ds1307 = i2c_get_clientdata(client);
+	struct mutex            *lock = &ds1307->rtc->ops_lock;
 	int reg, ret;
 
-	mutex_lock(&ds1307->rtc->ops_lock);
+	mutex_lock(lock);
 
 	/* Check and clear alarm 0 interrupt flag. */
 	reg = i2c_smbus_read_byte_data(client, MCP794XX_REG_ALARM0_CTRL);
@@ -665,9 +645,9 @@ static void mcp794xx_work(struct work_struct *work)
 	rtc_update_irq(ds1307->rtc, 1, RTC_AF | RTC_IRQF);
 
 out:
-	if (test_bit(HAS_ALARM, &ds1307->flags))
-		enable_irq(client->irq);
-	mutex_unlock(&ds1307->rtc->ops_lock);
+	mutex_unlock(lock);
+
+	return IRQ_HANDLED;
 }
 
 static int mcp794xx_read_alarm(struct device *dev, struct rtc_wkalrm *t)
@@ -896,6 +876,8 @@ static int ds1307_probe(struct i2c_client *client,
 	bool			want_irq = false;
 	unsigned char		*buf;
 	struct ds1307_platform_data *pdata = dev_get_platdata(&client->dev);
+	irq_handler_t	irq_handler = ds1307_irq;
+
 	static const int	bbsqi_bitpos[] = {
 		[ds_1337] = 0,
 		[ds_1339] = DS1339_BIT_BBSQI,
@@ -962,8 +944,6 @@ static int ds1307_probe(struct i2c_client *client,
 		 * running on Vbackup (BBSQI/BBSQW)
 		 */
 		if (ds1307->client->irq > 0 && chip->alarm) {
-			INIT_WORK(&ds1307->work, ds1307_work);
-
 			ds1307->regs[0] |= DS1337_BIT_INTCN
 					| bbsqi_bitpos[ds1307->type];
 			ds1307->regs[0] &= ~(DS1337_BIT_A2IE | DS1337_BIT_A1IE);
@@ -1053,7 +1033,7 @@ static int ds1307_probe(struct i2c_client *client,
 	case mcp794xx:
 		rtc_ops = &mcp794xx_rtc_ops;
 		if (ds1307->client->irq > 0 && chip->alarm) {
-			INIT_WORK(&ds1307->work, mcp794xx_work);
+			irq_handler = mcp794xx_irq;
 			want_irq = true;
 		}
 		break;
@@ -1176,8 +1156,9 @@ static int ds1307_probe(struct i2c_client *client,
 	}
 
 	if (want_irq) {
-		err = request_irq(client->irq, ds1307_irq, IRQF_SHARED,
-			  ds1307->rtc->name, client);
+		err = request_threaded_irq(client->irq, NULL, irq_handler,
+					   IRQF_SHARED | IRQF_ONESHOT,
+					   ds1307->rtc->name, client);
 		if (err) {
 			client->irq = 0;
 			dev_err(&client->dev, "unable to request IRQ!\n");
@@ -1231,10 +1212,8 @@ static int ds1307_remove(struct i2c_client *client)
 {
 	struct ds1307 *ds1307 = i2c_get_clientdata(client);
 
-	if (test_and_clear_bit(HAS_ALARM, &ds1307->flags)) {
+	if (test_and_clear_bit(HAS_ALARM, &ds1307->flags))
 		free_irq(client->irq, client);
-		cancel_work_sync(&ds1307->work);
-	}
 
 	if (test_and_clear_bit(HAS_NVRAM, &ds1307->flags))
 		sysfs_remove_bin_file(&client->dev.kobj, ds1307->nvram);

From c5983191362af5ef7a627d8811a45f0fd01a3582 Mon Sep 17 00:00:00 2001
From: Nishanth Menon <nm@ti.com>
Date: Tue, 23 Jun 2015 11:15:11 -0500
Subject: [PATCH 493/734] rtc: ds1307: Switch to managed irq allocation

Since we are not doing anything fancy in remove function that requires
us to sequence IRQ free operation, we might as well switch over to devm_
equivalent of managed IRQ allocation and remove the explicit free_irq
since it'd be done automatically at remove.

Signed-off-by: Nishanth Menon <nm@ti.com>
Acked-by: Felipe Balbi <balbi@ti.com>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ds1307.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index 8ea496e54a2e82..0a98d8a5279150 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -1156,9 +1156,10 @@ static int ds1307_probe(struct i2c_client *client,
 	}
 
 	if (want_irq) {
-		err = request_threaded_irq(client->irq, NULL, irq_handler,
-					   IRQF_SHARED | IRQF_ONESHOT,
-					   ds1307->rtc->name, client);
+		err = devm_request_threaded_irq(&client->dev,
+						client->irq, NULL, irq_handler,
+						IRQF_SHARED | IRQF_ONESHOT,
+						ds1307->rtc->name, client);
 		if (err) {
 			client->irq = 0;
 			dev_err(&client->dev, "unable to request IRQ!\n");
@@ -1212,9 +1213,6 @@ static int ds1307_remove(struct i2c_client *client)
 {
 	struct ds1307 *ds1307 = i2c_get_clientdata(client);
 
-	if (test_and_clear_bit(HAS_ALARM, &ds1307->flags))
-		free_irq(client->irq, client);
-
 	if (test_and_clear_bit(HAS_NVRAM, &ds1307->flags))
 		sysfs_remove_bin_file(&client->dev.kobj, ds1307->nvram);
 

From eac7237fd8432e232af3c407e667dbdc17ebf1d8 Mon Sep 17 00:00:00 2001
From: Nishanth Menon <nm@ti.com>
Date: Tue, 23 Jun 2015 11:15:12 -0500
Subject: [PATCH 494/734] rtc: ds1307: Sort the headers

It is always a good practice to keep the #includes sorted

Signed-off-by: Nishanth Menon <nm@ti.com>
Acked-by: Felipe Balbi <balbi@ti.com>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ds1307.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index 0a98d8a5279150..b03880fc32b56a 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -11,14 +11,14 @@
  * published by the Free Software Foundation.
  */
 
-#include <linux/module.h>
+#include <linux/bcd.h>
+#include <linux/i2c.h>
 #include <linux/init.h>
+#include <linux/module.h>
+#include <linux/rtc/ds1307.h>
+#include <linux/rtc.h>
 #include <linux/slab.h>
-#include <linux/i2c.h>
 #include <linux/string.h>
-#include <linux/rtc.h>
-#include <linux/bcd.h>
-#include <linux/rtc/ds1307.h>
 
 /*
  * We can't determine type by probing, but if we expect pre-Linux code

From 7abea617a4bae178da0f42983998c779ec2f732d Mon Sep 17 00:00:00 2001
From: Nishanth Menon <nm@ti.com>
Date: Wed, 24 Jun 2015 11:26:54 -0500
Subject: [PATCH 495/734] rtc: ds1307: Support optional wakeup interrupt source

With the recent pinctrl-single changes, SoCs such as Texas
Instrument's OMAP processors can treat wake-up events from deeper idle
states as interrupts.

Let's add support for the optional second interrupt for wake-up using
the generic wakeirq support added in commit 4990d4fe327b ("PM /
Wakeirq: Add automated device wake IRQ handling")

Finally, to pass the wake-up interrupt in the dts file,
interrupts-extended property needs to be passed.

This is similar in approach to commit 2a0b965cfb6e ("serial: omap: Add
support for optional wake-up") + ee83bd3b6483 ("serial: omap: Switch
wake-up interrupt to generic wakeirq")

Signed-off-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Acked-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ds1307.c | 36 +++++++++++++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index b03880fc32b56a..e16989c48a90f1 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -15,6 +15,9 @@
 #include <linux/i2c.h>
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/pm_wakeirq.h>
 #include <linux/rtc/ds1307.h>
 #include <linux/rtc.h>
 #include <linux/slab.h>
@@ -114,6 +117,7 @@ struct ds1307 {
 #define HAS_ALARM	1		/* bit 1 == irq claimed */
 	struct i2c_client	*client;
 	struct rtc_device	*rtc;
+	int			wakeirq;
 	s32 (*read_block_data)(const struct i2c_client *client, u8 command,
 			       u8 length, u8 *values);
 	s32 (*write_block_data)(const struct i2c_client *client, u8 command,
@@ -1156,6 +1160,8 @@ static int ds1307_probe(struct i2c_client *client,
 	}
 
 	if (want_irq) {
+		struct device_node *node = client->dev.of_node;
+
 		err = devm_request_threaded_irq(&client->dev,
 						client->irq, NULL, irq_handler,
 						IRQF_SHARED | IRQF_ONESHOT,
@@ -1163,13 +1169,34 @@ static int ds1307_probe(struct i2c_client *client,
 		if (err) {
 			client->irq = 0;
 			dev_err(&client->dev, "unable to request IRQ!\n");
-		} else {
+			goto no_irq;
+		}
 
-			set_bit(HAS_ALARM, &ds1307->flags);
-			dev_dbg(&client->dev, "got IRQ %d\n", client->irq);
+		set_bit(HAS_ALARM, &ds1307->flags);
+		dev_dbg(&client->dev, "got IRQ %d\n", client->irq);
+
+		/* Currently supported by OF code only! */
+		if (!node)
+			goto no_irq;
+
+		err = of_irq_get(node, 1);
+		if (err <= 0) {
+			if (err == -EPROBE_DEFER)
+				goto exit;
+			goto no_irq;
+		}
+		ds1307->wakeirq = err;
+
+		err = dev_pm_set_dedicated_wake_irq(&client->dev,
+						    ds1307->wakeirq);
+		if (err) {
+			dev_err(&client->dev, "unable to setup wakeIRQ %d!\n",
+				err);
+			goto exit;
 		}
 	}
 
+no_irq:
 	if (chip->nvram_size) {
 
 		ds1307->nvram = devm_kzalloc(&client->dev,
@@ -1213,6 +1240,9 @@ static int ds1307_remove(struct i2c_client *client)
 {
 	struct ds1307 *ds1307 = i2c_get_clientdata(client);
 
+	if (ds1307->wakeirq)
+		dev_pm_clear_wake_irq(&client->dev);
+
 	if (test_and_clear_bit(HAS_NVRAM, &ds1307->flags))
 		sysfs_remove_bin_file(&client->dev.kobj, ds1307->nvram);
 

From 508db592e2f54d731bf2f5eabd9642a1a566f276 Mon Sep 17 00:00:00 2001
From: Vaishali Thakkar <vthakkar1994@gmail.com>
Date: Tue, 7 Jul 2015 11:16:14 +0530
Subject: [PATCH 496/734] rtc: ds1685: Use module_platform_driver

Use module_platform_driver for drivers whose init and exit functions
only register and unregister, respectively.

A simplified version of the Coccinelle semantic patch that performs
this transformation is as follows:

@a@
identifier f, x;
@@
-static f(...) { return platform_driver_register(&x); }

@b depends on a@
identifier e, a.x;
@@
-static e(...) { platform_driver_unregister(&x); }

@c depends on a && b@
identifier a.f;
declarer name module_init;
@@
-module_init(f);

@d depends on a && b && c@
identifier b.e, a.x;
declarer name module_exit;
declarer name module_platform_driver;
@@
-module_exit(e);
+module_platform_driver(x);

Signed-off-by: Vaishali Thakkar <vthakkar1994@gmail.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ds1685.c | 22 +---------------------
 1 file changed, 1 insertion(+), 21 deletions(-)

diff --git a/drivers/rtc/rtc-ds1685.c b/drivers/rtc/rtc-ds1685.c
index 818a3635a8c867..05a51ef5270327 100644
--- a/drivers/rtc/rtc-ds1685.c
+++ b/drivers/rtc/rtc-ds1685.c
@@ -2145,27 +2145,7 @@ static struct platform_driver ds1685_rtc_driver = {
 	.probe		= ds1685_rtc_probe,
 	.remove		= ds1685_rtc_remove,
 };
-
-/**
- * ds1685_rtc_init - rtc module init.
- */
-static int __init
-ds1685_rtc_init(void)
-{
-	return platform_driver_register(&ds1685_rtc_driver);
-}
-
-/**
- * ds1685_rtc_exit - rtc module exit.
- */
-static void __exit
-ds1685_rtc_exit(void)
-{
-	platform_driver_unregister(&ds1685_rtc_driver);
-}
-
-module_init(ds1685_rtc_init);
-module_exit(ds1685_rtc_exit);
+module_platform_driver(ds1685_rtc_driver);
 /* ----------------------------------------------------------------------- */
 
 
From 617f6f7ef5bfe8c0ac580243c3da9a836c6e39bf Mon Sep 17 00:00:00 2001
From: Maninder Singh <maninder1.s@samsung.com>
Date: Wed, 8 Jul 2015 12:26:47 +0530
Subject: [PATCH 497/734] rtc: bq32k: remove redundant check

removing below static analysis error:
(error) Possible null pointer dereference: client

if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
							^^^^^^^
Error comes because client is dereferenced before NULL check.
So probably NULL this check is not required.

Signed-off-by: Maninder Singh <maninder1.s@samsung.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-bq32k.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-bq32k.c b/drivers/rtc/rtc-bq32k.c
index 92679df6d6e222..409de9f1b604c6 100644
--- a/drivers/rtc/rtc-bq32k.c
+++ b/drivers/rtc/rtc-bq32k.c
@@ -212,7 +212,7 @@ static int bq32k_probe(struct i2c_client *client,
 	if (error)
 		return error;
 
-	if (client && client->dev.of_node)
+	if (client->dev.of_node)
 		trickle_charger_of_init(dev, client->dev.of_node);
 
 	rtc = devm_rtc_device_register(&client->dev, bq32k_driver.driver.name,

From 4ab82103131777b9aabb6ba31aead6e5b0293b32 Mon Sep 17 00:00:00 2001
From: Vaibhav Hiremath <vaibhav.hiremath@linaro.org>
Date: Thu, 9 Jul 2015 12:25:51 +0530
Subject: [PATCH 498/734] rtc: 88pm80x: add device tree support

Along with DT support, this patch also cleans up the unnecessary
code around 'rtc_wakeup' initialization.

Signed-off-by: Chao Xie <chao.xie@marvell.com>
Signed-off-by: Vaibhav Hiremath <vaibhav.hiremath@linaro.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-88pm80x.c | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/drivers/rtc/rtc-88pm80x.c b/drivers/rtc/rtc-88pm80x.c
index 7df0579d9852c0..466bf7f9a285a5 100644
--- a/drivers/rtc/rtc-88pm80x.c
+++ b/drivers/rtc/rtc-88pm80x.c
@@ -251,17 +251,26 @@ static SIMPLE_DEV_PM_OPS(pm80x_rtc_pm_ops, pm80x_rtc_suspend, pm80x_rtc_resume);
 static int pm80x_rtc_probe(struct platform_device *pdev)
 {
 	struct pm80x_chip *chip = dev_get_drvdata(pdev->dev.parent);
-	struct pm80x_platform_data *pm80x_pdata =
-				dev_get_platdata(pdev->dev.parent);
-	struct pm80x_rtc_pdata *pdata = NULL;
+	struct pm80x_rtc_pdata *pdata = dev_get_platdata(&pdev->dev);
 	struct pm80x_rtc_info *info;
+	struct device_node *node = pdev->dev.of_node;
 	struct rtc_time tm;
 	unsigned long ticks = 0;
 	int ret;
 
-	pdata = dev_get_platdata(&pdev->dev);
-	if (pdata == NULL)
-		dev_warn(&pdev->dev, "No platform data!\n");
+	if (!pdata && !node) {
+		dev_err(&pdev->dev,
+			"pm80x-rtc requires platform data or of_node\n");
+		return -EINVAL;
+	}
+
+	if (!pdata) {
+		pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+		if (!pdata) {
+			dev_err(&pdev->dev, "failed to allocate memory\n");
+			return -ENOMEM;
+		}
+	}
 
 	info =
 	    devm_kzalloc(&pdev->dev, sizeof(struct pm80x_rtc_info), GFP_KERNEL);
@@ -327,11 +336,8 @@ static int pm80x_rtc_probe(struct platform_device *pdev)
 	regmap_update_bits(info->map, PM800_RTC_CONTROL, PM800_RTC1_USE_XO,
 			   PM800_RTC1_USE_XO);
 
-	if (pm80x_pdata) {
-		pdata = pm80x_pdata->rtc;
-		if (pdata)
-			info->rtc_dev->dev.platform_data = &pdata->rtc_wakeup;
-	}
+	/* remember whether this power up is caused by PMIC RTC or not */
+	info->rtc_dev->dev.platform_data = &pdata->rtc_wakeup;
 
 	device_init_wakeup(&pdev->dev, 1);
 

From 821f51c4da869706356ddecfeeac286bf4df9b98 Mon Sep 17 00:00:00 2001
From: Andrea Scian <andrea.scian@dave.eu>
Date: Tue, 16 Jun 2015 11:35:19 +0200
Subject: [PATCH 499/734] rtc: use rtc_valid_tm() error code when reading
 date/time

There's a wrong comment in some RTC drivers that say it's better to ignore
rtc_valid_tm() when reading RTC timestamp. However this is wrong and is
better to return to the userspace the error if timestamp is not valid.

Signed-off-by: Andrea Scian <andrea.scian@dave.eu>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-isl12022.c | 7 +------
 drivers/rtc/rtc-pcf2123.c  | 8 +-------
 drivers/rtc/rtc-pcf2127.c  | 8 +-------
 3 files changed, 3 insertions(+), 20 deletions(-)

diff --git a/drivers/rtc/rtc-isl12022.c b/drivers/rtc/rtc-isl12022.c
index f9b082784b9064..37262713678636 100644
--- a/drivers/rtc/rtc-isl12022.c
+++ b/drivers/rtc/rtc-isl12022.c
@@ -151,12 +151,7 @@ static int isl12022_get_datetime(struct i2c_client *client, struct rtc_time *tm)
 		tm->tm_sec, tm->tm_min, tm->tm_hour,
 		tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
 
-	/* The clock can give out invalid datetime, but we cannot return
-	 * -EINVAL otherwise hwclock will refuse to set the time on bootup. */
-	if (rtc_valid_tm(tm) < 0)
-		dev_err(&client->dev, "retrieved date and time is invalid.\n");
-
-	return 0;
+	return rtc_valid_tm(tm);
 }
 
 static int isl12022_set_datetime(struct i2c_client *client, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-pcf2123.c b/drivers/rtc/rtc-pcf2123.c
index 8a7556cbcb7f01..1c47650fe624fe 100644
--- a/drivers/rtc/rtc-pcf2123.c
+++ b/drivers/rtc/rtc-pcf2123.c
@@ -165,13 +165,7 @@ static int pcf2123_rtc_read_time(struct device *dev, struct rtc_time *tm)
 			tm->tm_sec, tm->tm_min, tm->tm_hour,
 			tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
 
-	/* the clock can give out invalid datetime, but we cannot return
-	 * -EINVAL otherwise hwclock will refuse to set the time on bootup.
-	 */
-	if (rtc_valid_tm(tm) < 0)
-		dev_err(dev, "retrieved date/time is not valid.\n");
-
-	return 0;
+	return rtc_valid_tm(tm);
 }
 
 static int pcf2123_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c
index 9bd842e977492d..350c5c7cb67802 100644
--- a/drivers/rtc/rtc-pcf2127.c
+++ b/drivers/rtc/rtc-pcf2127.c
@@ -88,13 +88,7 @@ static int pcf2127_get_datetime(struct i2c_client *client, struct rtc_time *tm)
 		tm->tm_sec, tm->tm_min, tm->tm_hour,
 		tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
 
-	/* the clock can give out invalid datetime, but we cannot return
-	 * -EINVAL otherwise hwclock will refuse to set the time on bootup.
-	 */
-	if (rtc_valid_tm(tm) < 0)
-		dev_err(&client->dev, "retrieved date/time is not valid.\n");
-
-	return 0;
+	return rtc_valid_tm(tm);
 }
 
 static int pcf2127_set_datetime(struct i2c_client *client, struct rtc_time *tm)

From 653ebd75e9e469e99a40ab14128d915386dc78c6 Mon Sep 17 00:00:00 2001
From: Andrea Scian <andrea.scian@dave.eu>
Date: Tue, 16 Jun 2015 11:39:47 +0200
Subject: [PATCH 500/734] rtc: pcf2127: use OFS flag to detect unreliable date
 and warn the user

The PCF2127 datasheet states that it's wrong to say that the date in
unreliable if BLF (battery low flag) is set but instead, OSF (seconds
register) should be used to check if oscillator, for any reason, stopped.
Battery may be low (usually below 2V5 threshold) but the date may be anyway
correct (typically date is unreliable when input voltage is below 1V2).

Signed-off-by: Andrea Scian <andrea.scian@dave.eu>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-pcf2127.c | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c
index 350c5c7cb67802..baf45c9ca65eef 100644
--- a/drivers/rtc/rtc-pcf2127.c
+++ b/drivers/rtc/rtc-pcf2127.c
@@ -33,11 +33,14 @@
 #define PCF2127_REG_MO          (0x08)
 #define PCF2127_REG_YR          (0x09)
 
+#define PCF2127_OSF             BIT(7)  /* Oscillator Fail flag */
+
 static struct i2c_driver pcf2127_driver;
 
 struct pcf2127 {
 	struct rtc_device *rtc;
 	int voltage_low; /* indicates if a low_voltage was detected */
+	int oscillator_failed; /* OSF was detected and date is unreliable */
 };
 
 /*
@@ -59,7 +62,18 @@ static int pcf2127_get_datetime(struct i2c_client *client, struct rtc_time *tm)
 	if (buf[PCF2127_REG_CTRL3] & 0x04) {
 		pcf2127->voltage_low = 1;
 		dev_info(&client->dev,
-			"low voltage detected, date/time is not reliable.\n");
+			"low voltage detected, check/replace RTC battery.\n");
+	}
+
+	if (buf[PCF2127_REG_SC] & PCF2127_OSF) {
+		/*
+		 * no need clear the flag here,
+		 * it will be cleared once the new date is saved
+		 */
+		pcf2127->oscillator_failed = 1;
+		dev_warn(&client->dev,
+			 "oscillator stop detected, date/time is not reliable\n");
+		return -EINVAL;
 	}
 
 	dev_dbg(&client->dev,
@@ -93,6 +107,7 @@ static int pcf2127_get_datetime(struct i2c_client *client, struct rtc_time *tm)
 
 static int pcf2127_set_datetime(struct i2c_client *client, struct rtc_time *tm)
 {
+	struct pcf2127 *pcf2127 = i2c_get_clientdata(client);
 	unsigned char buf[8];
 	int i = 0, err;
 
@@ -106,7 +121,7 @@ static int pcf2127_set_datetime(struct i2c_client *client, struct rtc_time *tm)
 	buf[i++] = PCF2127_REG_SC;
 
 	/* hours, minutes and seconds */
-	buf[i++] = bin2bcd(tm->tm_sec);
+	buf[i++] = bin2bcd(tm->tm_sec);	/* this will also clear OSF flag */
 	buf[i++] = bin2bcd(tm->tm_min);
 	buf[i++] = bin2bcd(tm->tm_hour);
 	buf[i++] = bin2bcd(tm->tm_mday);
@@ -126,6 +141,9 @@ static int pcf2127_set_datetime(struct i2c_client *client, struct rtc_time *tm)
 		return -EIO;
 	}
 
+	/* clear OSF flag in client data */
+	pcf2127->oscillator_failed = 0;
+
 	return 0;
 }
 
@@ -138,7 +156,9 @@ static int pcf2127_rtc_ioctl(struct device *dev,
 	switch (cmd) {
 	case RTC_VL_READ:
 		if (pcf2127->voltage_low)
-			dev_info(dev, "low voltage detected, date/time is not reliable.\n");
+			dev_info(dev, "low voltage detected, check/replace battery\n");
+		if (pcf2127->oscillator_failed)
+			dev_info(dev, "oscillator stop detected, date/time is not reliable\n");
 
 		if (copy_to_user((void __user *)arg, &pcf2127->voltage_low,
 					sizeof(int)))

From b28845433eb9c205c381ed69b09167d6ae5aac1c Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Fri, 10 Jul 2015 15:39:50 +0900
Subject: [PATCH 501/734] rtc: Drop owner assignment from i2c_driver

i2c_driver does not need to set an owner because i2c_register_driver()
will set it.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ab-b5ze-s3.c | 1 -
 drivers/rtc/rtc-bq32k.c      | 1 -
 drivers/rtc/rtc-ds1307.c     | 1 -
 drivers/rtc/rtc-ds1374.c     | 1 -
 drivers/rtc/rtc-ds3232.c     | 1 -
 drivers/rtc/rtc-fm3130.c     | 1 -
 drivers/rtc/rtc-hym8563.c    | 1 -
 drivers/rtc/rtc-isl12057.c   | 1 -
 drivers/rtc/rtc-pcf2127.c    | 1 -
 drivers/rtc/rtc-pcf85063.c   | 1 -
 drivers/rtc/rtc-pcf8523.c    | 1 -
 drivers/rtc/rtc-pcf8563.c    | 1 -
 drivers/rtc/rtc-pcf8583.c    | 1 -
 drivers/rtc/rtc-rx8025.c     | 1 -
 drivers/rtc/rtc-rx8581.c     | 1 -
 15 files changed, 15 deletions(-)

diff --git a/drivers/rtc/rtc-ab-b5ze-s3.c b/drivers/rtc/rtc-ab-b5ze-s3.c
index b5cbc1bf5a3e5f..0fb1d767afa9e2 100644
--- a/drivers/rtc/rtc-ab-b5ze-s3.c
+++ b/drivers/rtc/rtc-ab-b5ze-s3.c
@@ -1020,7 +1020,6 @@ MODULE_DEVICE_TABLE(i2c, abb5zes3_id);
 static struct i2c_driver abb5zes3_driver = {
 	.driver = {
 		.name = DRV_NAME,
-		.owner = THIS_MODULE,
 		.pm = &abb5zes3_rtc_pm_ops,
 		.of_match_table = of_match_ptr(abb5zes3_dt_match),
 	},
diff --git a/drivers/rtc/rtc-bq32k.c b/drivers/rtc/rtc-bq32k.c
index 409de9f1b604c6..0299988b4f1368 100644
--- a/drivers/rtc/rtc-bq32k.c
+++ b/drivers/rtc/rtc-bq32k.c
@@ -234,7 +234,6 @@ MODULE_DEVICE_TABLE(i2c, bq32k_id);
 static struct i2c_driver bq32k_driver = {
 	.driver = {
 		.name	= "bq32k",
-		.owner	= THIS_MODULE,
 	},
 	.probe		= bq32k_probe,
 	.id_table	= bq32k_id,
diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index e16989c48a90f1..c51bc0a65afce4 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -1252,7 +1252,6 @@ static int ds1307_remove(struct i2c_client *client)
 static struct i2c_driver ds1307_driver = {
 	.driver = {
 		.name	= "rtc-ds1307",
-		.owner	= THIS_MODULE,
 	},
 	.probe		= ds1307_probe,
 	.remove		= ds1307_remove,
diff --git a/drivers/rtc/rtc-ds1374.c b/drivers/rtc/rtc-ds1374.c
index 72c9333752339c..6d8665647eee28 100644
--- a/drivers/rtc/rtc-ds1374.c
+++ b/drivers/rtc/rtc-ds1374.c
@@ -708,7 +708,6 @@ static SIMPLE_DEV_PM_OPS(ds1374_pm, ds1374_suspend, ds1374_resume);
 static struct i2c_driver ds1374_driver = {
 	.driver = {
 		.name = "rtc-ds1374",
-		.owner = THIS_MODULE,
 		.pm = &ds1374_pm,
 	},
 	.probe = ds1374_probe,
diff --git a/drivers/rtc/rtc-ds3232.c b/drivers/rtc/rtc-ds3232.c
index 7e48e532214fe3..18f062f2a63435 100644
--- a/drivers/rtc/rtc-ds3232.c
+++ b/drivers/rtc/rtc-ds3232.c
@@ -500,7 +500,6 @@ MODULE_DEVICE_TABLE(i2c, ds3232_id);
 static struct i2c_driver ds3232_driver = {
 	.driver = {
 		.name = "rtc-ds3232",
-		.owner = THIS_MODULE,
 		.pm	= &ds3232_pm_ops,
 	},
 	.probe = ds3232_probe,
diff --git a/drivers/rtc/rtc-fm3130.c b/drivers/rtc/rtc-fm3130.c
index 83c3b3029fa774..576eadbba29679 100644
--- a/drivers/rtc/rtc-fm3130.c
+++ b/drivers/rtc/rtc-fm3130.c
@@ -523,7 +523,6 @@ static int fm3130_probe(struct i2c_client *client,
 static struct i2c_driver fm3130_driver = {
 	.driver = {
 		.name	= "rtc-fm3130",
-		.owner	= THIS_MODULE,
 	},
 	.probe		= fm3130_probe,
 	.id_table	= fm3130_id,
diff --git a/drivers/rtc/rtc-hym8563.c b/drivers/rtc/rtc-hym8563.c
index e9da7959d3fe17..097325d96db566 100644
--- a/drivers/rtc/rtc-hym8563.c
+++ b/drivers/rtc/rtc-hym8563.c
@@ -599,7 +599,6 @@ MODULE_DEVICE_TABLE(of, hym8563_dt_idtable);
 static struct i2c_driver hym8563_driver = {
 	.driver		= {
 		.name	= "rtc-hym8563",
-		.owner	= THIS_MODULE,
 		.pm	= &hym8563_pm_ops,
 		.of_match_table	= hym8563_dt_idtable,
 	},
diff --git a/drivers/rtc/rtc-isl12057.c b/drivers/rtc/rtc-isl12057.c
index da818d3337cec5..ee3e8dbcacafe6 100644
--- a/drivers/rtc/rtc-isl12057.c
+++ b/drivers/rtc/rtc-isl12057.c
@@ -659,7 +659,6 @@ MODULE_DEVICE_TABLE(i2c, isl12057_id);
 static struct i2c_driver isl12057_driver = {
 	.driver = {
 		.name = DRV_NAME,
-		.owner = THIS_MODULE,
 		.pm = &isl12057_rtc_pm_ops,
 		.of_match_table = of_match_ptr(isl12057_dt_match),
 	},
diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c
index baf45c9ca65eef..4b11d31f71740b 100644
--- a/drivers/rtc/rtc-pcf2127.c
+++ b/drivers/rtc/rtc-pcf2127.c
@@ -231,7 +231,6 @@ MODULE_DEVICE_TABLE(of, pcf2127_of_match);
 static struct i2c_driver pcf2127_driver = {
 	.driver		= {
 		.name	= "rtc-pcf2127",
-		.owner	= THIS_MODULE,
 		.of_match_table = of_match_ptr(pcf2127_of_match),
 	},
 	.probe		= pcf2127_probe,
diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c
index 6a12bf62c504bf..b6d73dd881f248 100644
--- a/drivers/rtc/rtc-pcf85063.c
+++ b/drivers/rtc/rtc-pcf85063.c
@@ -189,7 +189,6 @@ MODULE_DEVICE_TABLE(of, pcf85063_of_match);
 static struct i2c_driver pcf85063_driver = {
 	.driver		= {
 		.name	= "rtc-pcf85063",
-		.owner	= THIS_MODULE,
 		.of_match_table = of_match_ptr(pcf85063_of_match),
 	},
 	.probe		= pcf85063_probe,
diff --git a/drivers/rtc/rtc-pcf8523.c b/drivers/rtc/rtc-pcf8523.c
index 4cdb64be061bd7..e7ebcc0b7e59b5 100644
--- a/drivers/rtc/rtc-pcf8523.c
+++ b/drivers/rtc/rtc-pcf8523.c
@@ -334,7 +334,6 @@ MODULE_DEVICE_TABLE(of, pcf8523_of_match);
 static struct i2c_driver pcf8523_driver = {
 	.driver = {
 		.name = DRIVER_NAME,
-		.owner = THIS_MODULE,
 		.of_match_table = of_match_ptr(pcf8523_of_match),
 	},
 	.probe = pcf8523_probe,
diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c
index 8bba022be946eb..e569243db57efb 100644
--- a/drivers/rtc/rtc-pcf8563.c
+++ b/drivers/rtc/rtc-pcf8563.c
@@ -483,7 +483,6 @@ MODULE_DEVICE_TABLE(of, pcf8563_of_match);
 static struct i2c_driver pcf8563_driver = {
 	.driver		= {
 		.name	= "rtc-pcf8563",
-		.owner	= THIS_MODULE,
 		.of_match_table = of_match_ptr(pcf8563_of_match),
 	},
 	.probe		= pcf8563_probe,
diff --git a/drivers/rtc/rtc-pcf8583.c b/drivers/rtc/rtc-pcf8583.c
index 5911a6dca29199..7ca9e8871d77d5 100644
--- a/drivers/rtc/rtc-pcf8583.c
+++ b/drivers/rtc/rtc-pcf8583.c
@@ -309,7 +309,6 @@ MODULE_DEVICE_TABLE(i2c, pcf8583_id);
 static struct i2c_driver pcf8583_driver = {
 	.driver = {
 		.name	= "pcf8583",
-		.owner	= THIS_MODULE,
 	},
 	.probe		= pcf8583_probe,
 	.id_table	= pcf8583_id,
diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c
index e6298e02b400e7..a297542e2f8a39 100644
--- a/drivers/rtc/rtc-rx8025.c
+++ b/drivers/rtc/rtc-rx8025.c
@@ -628,7 +628,6 @@ static int rx8025_remove(struct i2c_client *client)
 static struct i2c_driver rx8025_driver = {
 	.driver = {
 		.name = "rtc-rx8025",
-		.owner = THIS_MODULE,
 	},
 	.probe		= rx8025_probe,
 	.remove		= rx8025_remove,
diff --git a/drivers/rtc/rtc-rx8581.c b/drivers/rtc/rtc-rx8581.c
index de8d9c4277826c..161e25d016c39c 100644
--- a/drivers/rtc/rtc-rx8581.c
+++ b/drivers/rtc/rtc-rx8581.c
@@ -315,7 +315,6 @@ MODULE_DEVICE_TABLE(i2c, rx8581_id);
 static struct i2c_driver rx8581_driver = {
 	.driver		= {
 		.name	= "rtc-rx8581",
-		.owner	= THIS_MODULE,
 	},
 	.probe		= rx8581_probe,
 	.id_table	= rx8581_id,

From 045c6fdd37a01d950c0f5ca64733b53b184fe91b Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Fri, 10 Jul 2015 15:39:51 +0900
Subject: [PATCH 502/734] rtc: Drop owner assignment from platform_driver

platform_driver does not need to set an owner because
platform_driver_register() will set it.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-opal.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/rtc/rtc-opal.c b/drivers/rtc/rtc-opal.c
index 7061dcae2b09d5..417d7b4a5cd85a 100644
--- a/drivers/rtc/rtc-opal.c
+++ b/drivers/rtc/rtc-opal.c
@@ -236,7 +236,6 @@ static struct platform_driver opal_rtc_driver = {
 	.id_table	= opal_rtc_driver_ids,
 	.driver		= {
 		.name		= DRVNAME,
-		.owner		= THIS_MODULE,
 		.of_match_table	= opal_rtc_match,
 	},
 };

From dcb9372b34c9de90672e4cf811d7c3a8519320aa Mon Sep 17 00:00:00 2001
From: Joachim Eastwood <manabian@gmail.com>
Date: Sat, 11 Jul 2015 19:28:50 +0200
Subject: [PATCH 503/734] doc: dt: add documentation for nxp,lpc1788-rtc

Document NXP LPC178x/18xx/408x/43xx bindings

Signed-off-by: Joachim Eastwood <manabian@gmail.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 .../bindings/rtc/nxp,lpc1788-rtc.txt          | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/rtc/nxp,lpc1788-rtc.txt

diff --git a/Documentation/devicetree/bindings/rtc/nxp,lpc1788-rtc.txt b/Documentation/devicetree/bindings/rtc/nxp,lpc1788-rtc.txt
new file mode 100644
index 00000000000000..3c97bd180592a4
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/nxp,lpc1788-rtc.txt
@@ -0,0 +1,21 @@
+NXP LPC1788 real-time clock
+
+The LPC1788 RTC provides calendar and clock functionality
+together with periodic tick and alarm interrupt support.
+
+Required properties:
+- compatible	: must contain "nxp,lpc1788-rtc"
+- reg		: Specifies base physical address and size of the registers.
+- interrupts	: A single interrupt specifier.
+- clocks	: Must contain clock specifiers for rtc and register clock
+- clock-names	: Must contain "rtc" and "reg"
+  See ../clocks/clock-bindings.txt for details.
+
+Example:
+rtc: rtc@40046000 {
+	compatible = "nxp,lpc1788-rtc";
+	reg = <0x40046000 0x1000>;
+	interrupts = <47>;
+	clocks = <&creg_clk 0>, <&ccu1 CLK_CPU_BUS>;
+	clock-names = "rtc", "reg";
+};

From c28b42e3aee03fe869a3f73039cf92686ccbc8fb Mon Sep 17 00:00:00 2001
From: Joachim Eastwood <manabian@gmail.com>
Date: Sat, 11 Jul 2015 19:28:49 +0200
Subject: [PATCH 504/734] rtc: add rtc-lpc24xx driver

Add driver for the RTC found on NXP LPC178x/18xx/408x/43xx devices.
The RTC provides calendar and clock functionality together with
alarm interrupt support.

Signed-off-by: Joachim Eastwood <manabian@gmail.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/Kconfig       |  12 ++
 drivers/rtc/Makefile      |   1 +
 drivers/rtc/rtc-lpc24xx.c | 310 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 323 insertions(+)
 create mode 100644 drivers/rtc/rtc-lpc24xx.c

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 533bfa3b60397a..e132ccbec51583 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -1456,6 +1456,18 @@ config RTC_DRV_JZ4740
 	  This driver can also be buillt as a module. If so, the module
 	  will be called rtc-jz4740.
 
+config RTC_DRV_LPC24XX
+	tristate "NXP RTC for LPC178x/18xx/408x/43xx"
+	depends on ARCH_LPC18XX || COMPILE_TEST
+	depends on OF && HAS_IOMEM
+	help
+	  This enables support for the NXP RTC found which can be found on
+	  NXP LPC178x/18xx/408x/43xx devices.
+
+	  If you have one of the devices above enable this driver to use
+	  the hardware RTC. This driver can also be buillt as a module. If
+	  so, the module will be called rtc-lpc24xx.
+
 config RTC_DRV_LPC32XX
 	depends on ARCH_LPC32XX
 	tristate "NXP LPC32XX RTC"
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 1b09a62fcf4b88..279738449a8d18 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -74,6 +74,7 @@ obj-$(CONFIG_RTC_DRV_ISL12057)	+= rtc-isl12057.o
 obj-$(CONFIG_RTC_DRV_ISL1208)	+= rtc-isl1208.o
 obj-$(CONFIG_RTC_DRV_JZ4740)	+= rtc-jz4740.o
 obj-$(CONFIG_RTC_DRV_LP8788)	+= rtc-lp8788.o
+obj-$(CONFIG_RTC_DRV_LPC24XX)	+= rtc-lpc24xx.o
 obj-$(CONFIG_RTC_DRV_LPC32XX)	+= rtc-lpc32xx.o
 obj-$(CONFIG_RTC_DRV_LOONGSON1)	+= rtc-ls1x.o
 obj-$(CONFIG_RTC_DRV_M41T80)	+= rtc-m41t80.o
diff --git a/drivers/rtc/rtc-lpc24xx.c b/drivers/rtc/rtc-lpc24xx.c
new file mode 100644
index 00000000000000..59d99596fdebc8
--- /dev/null
+++ b/drivers/rtc/rtc-lpc24xx.c
@@ -0,0 +1,310 @@
+/*
+ * RTC driver for NXP LPC178x/18xx/43xx Real-Time Clock (RTC)
+ *
+ * Copyright (C) 2011 NXP Semiconductors
+ * Copyright (C) 2015 Joachim Eastwood <manabian@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/rtc.h>
+
+/* LPC24xx RTC register offsets and bits */
+#define LPC24XX_ILR		0x00
+#define  LPC24XX_RTCCIF		BIT(0)
+#define  LPC24XX_RTCALF		BIT(1)
+#define LPC24XX_CTC		0x04
+#define LPC24XX_CCR		0x08
+#define  LPC24XX_CLKEN		BIT(0)
+#define  LPC178X_CCALEN		BIT(4)
+#define LPC24XX_CIIR		0x0c
+#define LPC24XX_AMR		0x10
+#define  LPC24XX_ALARM_DISABLE	0xff
+#define LPC24XX_CTIME0		0x14
+#define LPC24XX_CTIME1		0x18
+#define LPC24XX_CTIME2		0x1c
+#define LPC24XX_SEC		0x20
+#define LPC24XX_MIN		0x24
+#define LPC24XX_HOUR		0x28
+#define LPC24XX_DOM		0x2c
+#define LPC24XX_DOW		0x30
+#define LPC24XX_DOY		0x34
+#define LPC24XX_MONTH		0x38
+#define LPC24XX_YEAR		0x3c
+#define LPC24XX_ALSEC		0x60
+#define LPC24XX_ALMIN		0x64
+#define LPC24XX_ALHOUR		0x68
+#define LPC24XX_ALDOM		0x6c
+#define LPC24XX_ALDOW		0x70
+#define LPC24XX_ALDOY		0x74
+#define LPC24XX_ALMON		0x78
+#define LPC24XX_ALYEAR		0x7c
+
+/* Macros to read fields in consolidated time (CT) registers */
+#define CT0_SECS(x)		(((x) >> 0)  & 0x3f)
+#define CT0_MINS(x)		(((x) >> 8)  & 0x3f)
+#define CT0_HOURS(x)		(((x) >> 16) & 0x1f)
+#define CT0_DOW(x)		(((x) >> 24) & 0x07)
+#define CT1_DOM(x)		(((x) >> 0)  & 0x1f)
+#define CT1_MONTH(x)		(((x) >> 8)  & 0x0f)
+#define CT1_YEAR(x)		(((x) >> 16) & 0xfff)
+#define CT2_DOY(x)		(((x) >> 0)  & 0xfff)
+
+#define rtc_readl(dev, reg)		readl((dev)->rtc_base + (reg))
+#define rtc_writel(dev, reg, val)	writel((val), (dev)->rtc_base + (reg))
+
+struct lpc24xx_rtc {
+	void __iomem *rtc_base;
+	struct rtc_device *rtc;
+	struct clk *clk_rtc;
+	struct clk *clk_reg;
+};
+
+static int lpc24xx_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	struct lpc24xx_rtc *rtc = dev_get_drvdata(dev);
+
+	/* Disable RTC during update */
+	rtc_writel(rtc, LPC24XX_CCR, LPC178X_CCALEN);
+
+	rtc_writel(rtc, LPC24XX_SEC,	tm->tm_sec);
+	rtc_writel(rtc, LPC24XX_MIN,	tm->tm_min);
+	rtc_writel(rtc, LPC24XX_HOUR,	tm->tm_hour);
+	rtc_writel(rtc, LPC24XX_DOW,	tm->tm_wday);
+	rtc_writel(rtc, LPC24XX_DOM,	tm->tm_mday);
+	rtc_writel(rtc, LPC24XX_DOY,	tm->tm_yday);
+	rtc_writel(rtc, LPC24XX_MONTH,	tm->tm_mon);
+	rtc_writel(rtc, LPC24XX_YEAR,	tm->tm_year);
+
+	rtc_writel(rtc, LPC24XX_CCR, LPC24XX_CLKEN | LPC178X_CCALEN);
+
+	return 0;
+}
+
+static int lpc24xx_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	struct lpc24xx_rtc *rtc = dev_get_drvdata(dev);
+	u32 ct0, ct1, ct2;
+
+	ct0 = rtc_readl(rtc, LPC24XX_CTIME0);
+	ct1 = rtc_readl(rtc, LPC24XX_CTIME1);
+	ct2 = rtc_readl(rtc, LPC24XX_CTIME2);
+
+	tm->tm_sec  = CT0_SECS(ct0);
+	tm->tm_min  = CT0_MINS(ct0);
+	tm->tm_hour = CT0_HOURS(ct0);
+	tm->tm_wday = CT0_DOW(ct0);
+	tm->tm_mon  = CT1_MONTH(ct1);
+	tm->tm_mday = CT1_DOM(ct1);
+	tm->tm_year = CT1_YEAR(ct1);
+	tm->tm_yday = CT2_DOY(ct2);
+
+	return rtc_valid_tm(tm);
+}
+
+static int lpc24xx_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *wkalrm)
+{
+	struct lpc24xx_rtc *rtc = dev_get_drvdata(dev);
+	struct rtc_time *tm = &wkalrm->time;
+
+	tm->tm_sec  = rtc_readl(rtc, LPC24XX_ALSEC);
+	tm->tm_min  = rtc_readl(rtc, LPC24XX_ALMIN);
+	tm->tm_hour = rtc_readl(rtc, LPC24XX_ALHOUR);
+	tm->tm_mday = rtc_readl(rtc, LPC24XX_ALDOM);
+	tm->tm_wday = rtc_readl(rtc, LPC24XX_ALDOW);
+	tm->tm_yday = rtc_readl(rtc, LPC24XX_ALDOY);
+	tm->tm_mon  = rtc_readl(rtc, LPC24XX_ALMON);
+	tm->tm_year = rtc_readl(rtc, LPC24XX_ALYEAR);
+
+	wkalrm->enabled = rtc_readl(rtc, LPC24XX_AMR) == 0;
+	wkalrm->pending = !!(rtc_readl(rtc, LPC24XX_ILR) & LPC24XX_RTCCIF);
+
+	return rtc_valid_tm(&wkalrm->time);
+}
+
+static int lpc24xx_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *wkalrm)
+{
+	struct lpc24xx_rtc *rtc = dev_get_drvdata(dev);
+	struct rtc_time *tm = &wkalrm->time;
+
+	/* Disable alarm irq during update */
+	rtc_writel(rtc, LPC24XX_AMR, LPC24XX_ALARM_DISABLE);
+
+	rtc_writel(rtc, LPC24XX_ALSEC,  tm->tm_sec);
+	rtc_writel(rtc, LPC24XX_ALMIN,  tm->tm_min);
+	rtc_writel(rtc, LPC24XX_ALHOUR, tm->tm_hour);
+	rtc_writel(rtc, LPC24XX_ALDOM,  tm->tm_mday);
+	rtc_writel(rtc, LPC24XX_ALDOW,  tm->tm_wday);
+	rtc_writel(rtc, LPC24XX_ALDOY,  tm->tm_yday);
+	rtc_writel(rtc, LPC24XX_ALMON,  tm->tm_mon);
+	rtc_writel(rtc, LPC24XX_ALYEAR, tm->tm_year);
+
+	if (wkalrm->enabled)
+		rtc_writel(rtc, LPC24XX_AMR, 0);
+
+	return 0;
+}
+
+static int lpc24xx_rtc_alarm_irq_enable(struct device *dev, unsigned int enable)
+{
+	struct lpc24xx_rtc *rtc = dev_get_drvdata(dev);
+
+	if (enable)
+		rtc_writel(rtc, LPC24XX_AMR, 0);
+	else
+		rtc_writel(rtc, LPC24XX_AMR, LPC24XX_ALARM_DISABLE);
+
+	return 0;
+}
+
+static irqreturn_t lpc24xx_rtc_interrupt(int irq, void *data)
+{
+	unsigned long events = RTC_IRQF;
+	struct lpc24xx_rtc *rtc = data;
+	u32 rtc_iir;
+
+	/* Check interrupt cause */
+	rtc_iir = rtc_readl(rtc, LPC24XX_ILR);
+	if (rtc_iir & LPC24XX_RTCALF) {
+		events |= RTC_AF;
+		rtc_writel(rtc, LPC24XX_AMR, LPC24XX_ALARM_DISABLE);
+	}
+
+	/* Clear interrupt status and report event */
+	rtc_writel(rtc, LPC24XX_ILR, rtc_iir);
+	rtc_update_irq(rtc->rtc, 1, events);
+
+	return IRQ_HANDLED;
+}
+
+static const struct rtc_class_ops lpc24xx_rtc_ops = {
+	.read_time		= lpc24xx_rtc_read_time,
+	.set_time		= lpc24xx_rtc_set_time,
+	.read_alarm		= lpc24xx_rtc_read_alarm,
+	.set_alarm		= lpc24xx_rtc_set_alarm,
+	.alarm_irq_enable	= lpc24xx_rtc_alarm_irq_enable,
+};
+
+static int lpc24xx_rtc_probe(struct platform_device *pdev)
+{
+	struct lpc24xx_rtc *rtc;
+	struct resource *res;
+	int irq, ret;
+
+	rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL);
+	if (!rtc)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	rtc->rtc_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(rtc->rtc_base))
+		return PTR_ERR(rtc->rtc_base);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_warn(&pdev->dev, "can't get interrupt resource\n");
+		return irq;
+	}
+
+	rtc->clk_rtc = devm_clk_get(&pdev->dev, "rtc");
+	if (IS_ERR(rtc->clk_rtc)) {
+		dev_err(&pdev->dev, "error getting rtc clock\n");
+		return PTR_ERR(rtc->clk_rtc);
+	}
+
+	rtc->clk_reg = devm_clk_get(&pdev->dev, "reg");
+	if (IS_ERR(rtc->clk_reg)) {
+		dev_err(&pdev->dev, "error getting reg clock\n");
+		return PTR_ERR(rtc->clk_reg);
+	}
+
+	ret = clk_prepare_enable(rtc->clk_rtc);
+	if (ret) {
+		dev_err(&pdev->dev, "unable to enable rtc clock\n");
+		return ret;
+	}
+
+	ret = clk_prepare_enable(rtc->clk_reg);
+	if (ret) {
+		dev_err(&pdev->dev, "unable to enable reg clock\n");
+		goto disable_rtc_clk;
+	}
+
+	platform_set_drvdata(pdev, rtc);
+
+	/* Clear any pending interrupts */
+	rtc_writel(rtc, LPC24XX_ILR, LPC24XX_RTCCIF | LPC24XX_RTCALF);
+
+	/* Enable RTC count */
+	rtc_writel(rtc, LPC24XX_CCR, LPC24XX_CLKEN | LPC178X_CCALEN);
+
+	ret = devm_request_irq(&pdev->dev, irq, lpc24xx_rtc_interrupt, 0,
+			       pdev->name, rtc);
+	if (ret < 0) {
+		dev_warn(&pdev->dev, "can't request interrupt\n");
+		goto disable_clks;
+	}
+
+	rtc->rtc = devm_rtc_device_register(&pdev->dev, "lpc24xx-rtc",
+					    &lpc24xx_rtc_ops, THIS_MODULE);
+	if (IS_ERR(rtc->rtc)) {
+		dev_err(&pdev->dev, "can't register rtc device\n");
+		ret = PTR_ERR(rtc->rtc);
+		goto disable_clks;
+	}
+
+	return 0;
+
+disable_clks:
+	clk_disable_unprepare(rtc->clk_reg);
+disable_rtc_clk:
+	clk_disable_unprepare(rtc->clk_rtc);
+	return ret;
+}
+
+static int lpc24xx_rtc_remove(struct platform_device *pdev)
+{
+	struct lpc24xx_rtc *rtc = platform_get_drvdata(pdev);
+
+	/* Ensure all interrupt sources are masked */
+	rtc_writel(rtc, LPC24XX_AMR, LPC24XX_ALARM_DISABLE);
+	rtc_writel(rtc, LPC24XX_CIIR, 0);
+
+	rtc_writel(rtc, LPC24XX_CCR, LPC178X_CCALEN);
+
+	clk_disable_unprepare(rtc->clk_rtc);
+	clk_disable_unprepare(rtc->clk_reg);
+
+	return 0;
+}
+
+static const struct of_device_id lpc24xx_rtc_match[] = {
+	{ .compatible = "nxp,lpc1788-rtc" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, lpc24xx_rtc_match);
+
+static struct platform_driver lpc24xx_rtc_driver = {
+	.probe	= lpc24xx_rtc_probe,
+	.remove	= lpc24xx_rtc_remove,
+	.driver	= {
+		.name = "lpc24xx-rtc",
+		.of_match_table	= lpc24xx_rtc_match,
+	},
+};
+module_platform_driver(lpc24xx_rtc_driver);
+
+MODULE_AUTHOR("Kevin Wells <wellsk40@gmail.com>");
+MODULE_DESCRIPTION("RTC driver for the LPC178x/18xx/408x/43xx SoCs");
+MODULE_LICENSE("GPL");

From f4a2eecb3ff9f51b179b213e7cc3766f920f2dc5 Mon Sep 17 00:00:00 2001
From: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
Date: Tue, 14 Jul 2015 13:28:28 +0530
Subject: [PATCH 505/734] rtc: opal: Enable alarms only when opal supports tpo

rtc-opal driver provides support for rtc alarms via
timed-power-on(tpo). However some Power platforms like BML use a fake
rtc clock and don't support tpo. Such platforms are indicated by the
missing 'has-tpo' property in the device tree.

Current implementation however enables callback for
rtc_class_ops.read/set alarm irrespective of the tpo support from the
platform. This results in a failed opal call when kernel tries to read
an existing alarms via opal_get_tpo_time during rtc device registration.

This patch fixes this issue by setting opal_rtc_ops.read/set_alarm
callback pointers only when tpo is supported.

Acked-by: Michael Neuling <mikey@neuling.org>
Acked-by: Neelesh Gupta <neelegup@linux.vnet.ibm.com>
Signed-off-by: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
Acked-by: Stewart Smith <stewart@linux.vnet.ibm.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-opal.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/rtc/rtc-opal.c b/drivers/rtc/rtc-opal.c
index 417d7b4a5cd85a..6fbf9e617151d1 100644
--- a/drivers/rtc/rtc-opal.c
+++ b/drivers/rtc/rtc-opal.c
@@ -190,11 +190,9 @@ static int opal_set_tpo_time(struct device *dev, struct rtc_wkalrm *alarm)
 	return rc;
 }
 
-static const struct rtc_class_ops opal_rtc_ops = {
+static struct rtc_class_ops opal_rtc_ops = {
 	.read_time	= opal_get_rtc_time,
 	.set_time	= opal_set_rtc_time,
-	.read_alarm	= opal_get_tpo_time,
-	.set_alarm	= opal_set_tpo_time,
 };
 
 static int opal_rtc_probe(struct platform_device *pdev)
@@ -202,8 +200,11 @@ static int opal_rtc_probe(struct platform_device *pdev)
 	struct rtc_device *rtc;
 
 	if (pdev->dev.of_node && of_get_property(pdev->dev.of_node, "has-tpo",
-						 NULL))
+						 NULL)) {
 		device_set_wakeup_capable(&pdev->dev, true);
+		opal_rtc_ops.read_alarm	= opal_get_tpo_time;
+		opal_rtc_ops.set_alarm = opal_set_tpo_time;
+	}
 
 	rtc = devm_rtc_device_register(&pdev->dev, DRVNAME, &opal_rtc_ops,
 				       THIS_MODULE);

From dfe6c04aa289de06df95d888719af63d1d4c982d Mon Sep 17 00:00:00 2001
From: Guo Zeng <guo.zeng@csr.com>
Date: Tue, 14 Jul 2015 01:31:38 +0000
Subject: [PATCH 506/734] rtc: sirfsoc: move to regmap APIs from
 platform-specific APIs

The current codes use CSR platform specific API exported by machine
codes to read/write RTC registers. they are:
sirfsoc_rtc_iobrg_readl()
sirfsoc_rtc_iobrg_writel()

commit b1999477ed91 ("ARM: prima2: move to use REGMAP APIs for rtciobrg")
moves to regmap support, now we can move to use regmap APIs in RTC
driver.

Signed-off-by: Guo Zeng <guo.zeng@csr.com>
Signed-off-by: Barry Song <Baohua.Song@csr.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-sirfsoc.c | 107 +++++++++++++++++++++++---------------
 1 file changed, 64 insertions(+), 43 deletions(-)

diff --git a/drivers/rtc/rtc-sirfsoc.c b/drivers/rtc/rtc-sirfsoc.c
index edc3b43282d403..7367f617145cde 100644
--- a/drivers/rtc/rtc-sirfsoc.c
+++ b/drivers/rtc/rtc-sirfsoc.c
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include <linux/io.h>
 #include <linux/of.h>
+#include <linux/regmap.h>
 #include <linux/rtc/sirfsoc_rtciobrg.h>
 
 
@@ -48,12 +49,27 @@ struct sirfsoc_rtc_drv {
 	/* Overflow for every 8 years extra time */
 	u32			overflow_rtc;
 	spinlock_t		lock;
+	struct regmap *regmap;
 #ifdef CONFIG_PM
 	u32		saved_counter;
 	u32		saved_overflow_rtc;
 #endif
 };
 
+static u32 sirfsoc_rtc_readl(struct sirfsoc_rtc_drv *rtcdrv, u32 offset)
+{
+	u32 val;
+
+	regmap_read(rtcdrv->regmap, rtcdrv->rtc_base + offset, &val);
+	return val;
+}
+
+static void sirfsoc_rtc_writel(struct sirfsoc_rtc_drv *rtcdrv,
+			       u32 offset, u32 val)
+{
+	regmap_write(rtcdrv->regmap, rtcdrv->rtc_base + offset, val);
+}
+
 static int sirfsoc_rtc_read_alarm(struct device *dev,
 		struct rtc_wkalrm *alrm)
 {
@@ -64,9 +80,9 @@ static int sirfsoc_rtc_read_alarm(struct device *dev,
 
 	spin_lock_irq(&rtcdrv->lock);
 
-	rtc_count = sirfsoc_rtc_iobrg_readl(rtcdrv->rtc_base + RTC_CN);
+	rtc_count = sirfsoc_rtc_readl(rtcdrv, RTC_CN);
 
-	rtc_alarm = sirfsoc_rtc_iobrg_readl(rtcdrv->rtc_base + RTC_ALARM0);
+	rtc_alarm = sirfsoc_rtc_readl(rtcdrv, RTC_ALARM0);
 	memset(alrm, 0, sizeof(struct rtc_wkalrm));
 
 	/*
@@ -82,8 +98,7 @@ static int sirfsoc_rtc_read_alarm(struct device *dev,
 		rtc_time_to_tm(rtcdrv->overflow_rtc
 				<< (BITS_PER_LONG - RTC_SHIFT)
 				| rtc_alarm >> RTC_SHIFT, &(alrm->time));
-	if (sirfsoc_rtc_iobrg_readl(
-			rtcdrv->rtc_base + RTC_STATUS) & SIRFSOC_RTC_AL0E)
+	if (sirfsoc_rtc_readl(rtcdrv, RTC_STATUS) & SIRFSOC_RTC_AL0E)
 		alrm->enabled = 1;
 
 	spin_unlock_irq(&rtcdrv->lock);
@@ -103,8 +118,7 @@ static int sirfsoc_rtc_set_alarm(struct device *dev,
 
 		spin_lock_irq(&rtcdrv->lock);
 
-		rtc_status_reg = sirfsoc_rtc_iobrg_readl(
-				rtcdrv->rtc_base + RTC_STATUS);
+		rtc_status_reg = sirfsoc_rtc_readl(rtcdrv, RTC_STATUS);
 		if (rtc_status_reg & SIRFSOC_RTC_AL0E) {
 			/*
 			 * An ongoing alarm in progress - ingore it and not
@@ -113,8 +127,7 @@ static int sirfsoc_rtc_set_alarm(struct device *dev,
 			dev_info(dev, "An old alarm was set, will be replaced by a new one\n");
 		}
 
-		sirfsoc_rtc_iobrg_writel(
-			rtc_alarm << RTC_SHIFT, rtcdrv->rtc_base + RTC_ALARM0);
+		sirfsoc_rtc_writel(rtcdrv, RTC_ALARM0, rtc_alarm << RTC_SHIFT);
 		rtc_status_reg &= ~0x07; /* mask out the lower status bits */
 		/*
 		 * This bit RTC_AL sets it as a wake-up source for Sleep Mode
@@ -123,8 +136,7 @@ static int sirfsoc_rtc_set_alarm(struct device *dev,
 		rtc_status_reg |= SIRFSOC_RTC_AL0;
 		/* enable the RTC alarm interrupt */
 		rtc_status_reg |= SIRFSOC_RTC_AL0E;
-		sirfsoc_rtc_iobrg_writel(
-			rtc_status_reg, rtcdrv->rtc_base + RTC_STATUS);
+		sirfsoc_rtc_writel(rtcdrv, RTC_STATUS, rtc_status_reg);
 
 		spin_unlock_irq(&rtcdrv->lock);
 	} else {
@@ -135,8 +147,7 @@ static int sirfsoc_rtc_set_alarm(struct device *dev,
 		 */
 		spin_lock_irq(&rtcdrv->lock);
 
-		rtc_status_reg = sirfsoc_rtc_iobrg_readl(
-				rtcdrv->rtc_base + RTC_STATUS);
+		rtc_status_reg = sirfsoc_rtc_readl(rtcdrv, RTC_STATUS);
 		if (rtc_status_reg & SIRFSOC_RTC_AL0E) {
 			/* clear the RTC status register's alarm bit */
 			rtc_status_reg &= ~0x07;
@@ -145,8 +156,8 @@ static int sirfsoc_rtc_set_alarm(struct device *dev,
 			/* Clear the Alarm enable bit */
 			rtc_status_reg &= ~(SIRFSOC_RTC_AL0E);
 
-			sirfsoc_rtc_iobrg_writel(rtc_status_reg,
-					rtcdrv->rtc_base + RTC_STATUS);
+			sirfsoc_rtc_writel(rtcdrv, RTC_STATUS,
+					   rtc_status_reg);
 		}
 
 		spin_unlock_irq(&rtcdrv->lock);
@@ -167,9 +178,9 @@ static int sirfsoc_rtc_read_time(struct device *dev,
 	 * fail, read several times to make sure get stable value.
 	 */
 	do {
-		tmp_rtc = sirfsoc_rtc_iobrg_readl(rtcdrv->rtc_base + RTC_CN);
+		tmp_rtc = sirfsoc_rtc_readl(rtcdrv, RTC_CN);
 		cpu_relax();
-	} while (tmp_rtc != sirfsoc_rtc_iobrg_readl(rtcdrv->rtc_base + RTC_CN));
+	} while (tmp_rtc != sirfsoc_rtc_readl(rtcdrv, RTC_CN));
 
 	rtc_time_to_tm(rtcdrv->overflow_rtc << (BITS_PER_LONG - RTC_SHIFT) |
 					tmp_rtc >> RTC_SHIFT, tm);
@@ -187,10 +198,8 @@ static int sirfsoc_rtc_set_time(struct device *dev,
 
 	rtcdrv->overflow_rtc = rtc_time >> (BITS_PER_LONG - RTC_SHIFT);
 
-	sirfsoc_rtc_iobrg_writel(rtcdrv->overflow_rtc,
-			rtcdrv->rtc_base + RTC_SW_VALUE);
-	sirfsoc_rtc_iobrg_writel(
-			rtc_time << RTC_SHIFT, rtcdrv->rtc_base + RTC_CN);
+	sirfsoc_rtc_writel(rtcdrv, RTC_SW_VALUE, rtcdrv->overflow_rtc);
+	sirfsoc_rtc_writel(rtcdrv, RTC_CN, rtc_time << RTC_SHIFT);
 
 	return 0;
 }
@@ -222,14 +231,13 @@ static int sirfsoc_rtc_alarm_irq_enable(struct device *dev,
 
 	spin_lock_irq(&rtcdrv->lock);
 
-	rtc_status_reg = sirfsoc_rtc_iobrg_readl(
-				rtcdrv->rtc_base + RTC_STATUS);
+	rtc_status_reg = sirfsoc_rtc_readl(rtcdrv, RTC_STATUS);
 	if (enabled)
 		rtc_status_reg |= SIRFSOC_RTC_AL0E;
 	else
 		rtc_status_reg &= ~SIRFSOC_RTC_AL0E;
 
-	sirfsoc_rtc_iobrg_writel(rtc_status_reg, rtcdrv->rtc_base + RTC_STATUS);
+	sirfsoc_rtc_writel(rtcdrv, RTC_STATUS, rtc_status_reg);
 
 	spin_unlock_irq(&rtcdrv->lock);
 
@@ -254,7 +262,7 @@ static irqreturn_t sirfsoc_rtc_irq_handler(int irq, void *pdata)
 
 	spin_lock(&rtcdrv->lock);
 
-	rtc_status_reg = sirfsoc_rtc_iobrg_readl(rtcdrv->rtc_base + RTC_STATUS);
+	rtc_status_reg = sirfsoc_rtc_readl(rtcdrv, RTC_STATUS);
 	/* this bit will be set ONLY if an alarm was active
 	 * and it expired NOW
 	 * So this is being used as an ASSERT
@@ -270,7 +278,8 @@ static irqreturn_t sirfsoc_rtc_irq_handler(int irq, void *pdata)
 		/* Clear the Alarm enable bit */
 		rtc_status_reg &= ~(SIRFSOC_RTC_AL0E);
 	}
-	sirfsoc_rtc_iobrg_writel(rtc_status_reg, rtcdrv->rtc_base + RTC_STATUS);
+
+	sirfsoc_rtc_writel(rtcdrv, RTC_STATUS, rtc_status_reg);
 
 	spin_unlock(&rtcdrv->lock);
 
@@ -287,6 +296,13 @@ static const struct of_device_id sirfsoc_rtc_of_match[] = {
 	{ .compatible = "sirf,prima2-sysrtc"},
 	{},
 };
+
+const struct regmap_config sysrtc_regmap_config = {
+	.reg_bits = 32,
+	.val_bits = 32,
+	.fast_io = true,
+};
+
 MODULE_DEVICE_TABLE(of, sirfsoc_rtc_of_match);
 
 static int sirfsoc_rtc_probe(struct platform_device *pdev)
@@ -314,27 +330,35 @@ static int sirfsoc_rtc_probe(struct platform_device *pdev)
 	/* Register rtc alarm as a wakeup source */
 	device_init_wakeup(&pdev->dev, 1);
 
+	rtcdrv->regmap = devm_regmap_init_iobg(&pdev->dev,
+			&sysrtc_regmap_config);
+	if (IS_ERR(rtcdrv->regmap)) {
+		err = PTR_ERR(rtcdrv->regmap);
+		dev_err(&pdev->dev, "Failed to allocate register map: %d\n",
+			err);
+		return err;
+	}
+
 	/*
 	 * Set SYS_RTC counter in RTC_HZ HZ Units
 	 * We are using 32K RTC crystal (32768 / RTC_HZ / 2) -1
 	 * If 16HZ, therefore RTC_DIV = 1023;
 	 */
 	rtc_div = ((32768 / RTC_HZ) / 2) - 1;
-	sirfsoc_rtc_iobrg_writel(rtc_div, rtcdrv->rtc_base + RTC_DIV);
+	sirfsoc_rtc_writel(rtcdrv, RTC_DIV, rtc_div);
 
 	/* 0x3 -> RTC_CLK */
-	sirfsoc_rtc_iobrg_writel(SIRFSOC_RTC_CLK,
-			rtcdrv->rtc_base + RTC_CLOCK_SWITCH);
+	sirfsoc_rtc_writel(rtcdrv, RTC_CLOCK_SWITCH, SIRFSOC_RTC_CLK);
 
 	/* reset SYS RTC ALARM0 */
-	sirfsoc_rtc_iobrg_writel(0x0, rtcdrv->rtc_base + RTC_ALARM0);
+	sirfsoc_rtc_writel(rtcdrv, RTC_ALARM0, 0x0);
 
 	/* reset SYS RTC ALARM1 */
-	sirfsoc_rtc_iobrg_writel(0x0, rtcdrv->rtc_base + RTC_ALARM1);
+	sirfsoc_rtc_writel(rtcdrv, RTC_ALARM1, 0x0);
 
 	/* Restore RTC Overflow From Register After Command Reboot */
 	rtcdrv->overflow_rtc =
-		sirfsoc_rtc_iobrg_readl(rtcdrv->rtc_base + RTC_SW_VALUE);
+		sirfsoc_rtc_readl(rtcdrv, RTC_SW_VALUE);
 
 	rtcdrv->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
 			&sirfsoc_rtc_ops, THIS_MODULE);
@@ -372,10 +396,10 @@ static int sirfsoc_rtc_suspend(struct device *dev)
 {
 	struct sirfsoc_rtc_drv *rtcdrv = dev_get_drvdata(dev);
 	rtcdrv->overflow_rtc =
-		sirfsoc_rtc_iobrg_readl(rtcdrv->rtc_base + RTC_SW_VALUE);
+		sirfsoc_rtc_readl(rtcdrv, RTC_SW_VALUE);
 
 	rtcdrv->saved_counter =
-		sirfsoc_rtc_iobrg_readl(rtcdrv->rtc_base + RTC_CN);
+		sirfsoc_rtc_readl(rtcdrv, RTC_CN);
 	rtcdrv->saved_overflow_rtc = rtcdrv->overflow_rtc;
 	if (device_may_wakeup(dev) && !enable_irq_wake(rtcdrv->irq))
 		rtcdrv->irq_wake = 1;
@@ -392,12 +416,10 @@ static int sirfsoc_rtc_resume(struct device *dev)
 	 * if resume from snapshot and the rtc power is lost,
 	 * restroe the rtc settings
 	 */
-	if (SIRFSOC_RTC_CLK != sirfsoc_rtc_iobrg_readl(
-			rtcdrv->rtc_base + RTC_CLOCK_SWITCH)) {
+	if (SIRFSOC_RTC_CLK != sirfsoc_rtc_readl(rtcdrv, RTC_CLOCK_SWITCH)) {
 		u32 rtc_div;
 		/* 0x3 -> RTC_CLK */
-		sirfsoc_rtc_iobrg_writel(SIRFSOC_RTC_CLK,
-			rtcdrv->rtc_base + RTC_CLOCK_SWITCH);
+		sirfsoc_rtc_writel(rtcdrv, RTC_CLOCK_SWITCH, SIRFSOC_RTC_CLK);
 		/*
 		 * Set SYS_RTC counter in RTC_HZ HZ Units
 		 * We are using 32K RTC crystal (32768 / RTC_HZ / 2) -1
@@ -405,13 +427,13 @@ static int sirfsoc_rtc_resume(struct device *dev)
 		 */
 		rtc_div = ((32768 / RTC_HZ) / 2) - 1;
 
-		sirfsoc_rtc_iobrg_writel(rtc_div, rtcdrv->rtc_base + RTC_DIV);
+		sirfsoc_rtc_writel(rtcdrv, RTC_DIV, rtc_div);
 
 		/* reset SYS RTC ALARM0 */
-		sirfsoc_rtc_iobrg_writel(0x0, rtcdrv->rtc_base + RTC_ALARM0);
+		sirfsoc_rtc_writel(rtcdrv, RTC_ALARM0, 0x0);
 
 		/* reset SYS RTC ALARM1 */
-		sirfsoc_rtc_iobrg_writel(0x0, rtcdrv->rtc_base + RTC_ALARM1);
+		sirfsoc_rtc_writel(rtcdrv, RTC_ALARM1, 0x0);
 	}
 	rtcdrv->overflow_rtc = rtcdrv->saved_overflow_rtc;
 
@@ -419,15 +441,14 @@ static int sirfsoc_rtc_resume(struct device *dev)
 	 * if current counter is small than previous,
 	 * it means overflow in sleep
 	 */
-	tmp = sirfsoc_rtc_iobrg_readl(rtcdrv->rtc_base + RTC_CN);
+	tmp = sirfsoc_rtc_readl(rtcdrv, RTC_CN);
 	if (tmp <= rtcdrv->saved_counter)
 		rtcdrv->overflow_rtc++;
 	/*
 	 *PWRC Value Be Changed When Suspend, Restore Overflow
 	 * In Memory To Register
 	 */
-	sirfsoc_rtc_iobrg_writel(rtcdrv->overflow_rtc,
-			rtcdrv->rtc_base + RTC_SW_VALUE);
+	sirfsoc_rtc_writel(rtcdrv, RTC_SW_VALUE, rtcdrv->overflow_rtc);
 
 	if (device_may_wakeup(dev) && rtcdrv->irq_wake) {
 		disable_irq_wake(rtcdrv->irq);

From 6706664d92ea841913d5fcfd06c290fbe6d33bd2 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 20 Jul 2015 16:02:49 -0700
Subject: [PATCH 507/734] rtc: class: fix double free in rtc_register_device()
 error path

Commit 59cca865f21e ("drivers/rtc/class.c: fix device_register() error
handling") correctly noted that naked kfree() should not be used after
failed device_register() call, however, while it added the needed
put_device() it forgot to remove the original kfree() causing double-free.

Cc: Vasiliy Kulikov <segooon@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/class.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index ea2a315df6b7bb..eb82ec2a21bd84 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -234,8 +234,9 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
 
 	err = device_register(&rtc->dev);
 	if (err) {
+		/* This will free both memory and the ID */
 		put_device(&rtc->dev);
-		goto exit_kfree;
+		goto exit;
 	}
 
 	rtc_dev_add_device(rtc);
@@ -247,9 +248,6 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
 
 	return rtc;
 
-exit_kfree:
-	kfree(rtc);
-
 exit_ida:
 	ida_simple_remove(&rtc_ida, id);
 

From c3b399a4b6703a04ef6eb3efe35ff12163e409e0 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 20 Jul 2015 16:02:50 -0700
Subject: [PATCH 508/734] rtc: class: remove unnecessary device_get() in
 rtc_device_unregister

Technically the address of rtc->dev can never be NULL, so get_device()
can never fail. Also caller of rtc_device_unregister() supposed to be
the owner of the device and thus have a valid reference. Therefore
call to get_device() is not needed here.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/class.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index eb82ec2a21bd84..de7707f7e7666a 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -266,19 +266,18 @@ EXPORT_SYMBOL_GPL(rtc_device_register);
  */
 void rtc_device_unregister(struct rtc_device *rtc)
 {
-	if (get_device(&rtc->dev) != NULL) {
-		mutex_lock(&rtc->ops_lock);
-		/* remove innards of this RTC, then disable it, before
-		 * letting any rtc_class_open() users access it again
-		 */
-		rtc_sysfs_del_device(rtc);
-		rtc_dev_del_device(rtc);
-		rtc_proc_del_device(rtc);
-		device_unregister(&rtc->dev);
-		rtc->ops = NULL;
-		mutex_unlock(&rtc->ops_lock);
-		put_device(&rtc->dev);
-	}
+	mutex_lock(&rtc->ops_lock);
+	/*
+	 * Remove innards of this RTC, then disable it, before
+	 * letting any rtc_class_open() users access it again
+	 */
+	rtc_sysfs_del_device(rtc);
+	rtc_dev_del_device(rtc);
+	rtc_proc_del_device(rtc);
+	device_del(&rtc->dev);
+	rtc->ops = NULL;
+	mutex_unlock(&rtc->ops_lock);
+	put_device(&rtc->dev);
 }
 EXPORT_SYMBOL_GPL(rtc_device_unregister);
 

From 1e4cd62558c293bc51cc179d676b708683a29c12 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 20 Jul 2015 16:02:51 -0700
Subject: [PATCH 509/734] rtc: dev: properly manage lifetime of dev and cdev in
 rtc device

struct rtc embeds both struct dev and struct cdev.  Unfortunately character
device structure may outlive the parent rtc structure unless we set it up
as parent of character device so that it will stay pinned until character
device is freed.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-dev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c
index 799c34bcb26f3b..a6d9434addf6f7 100644
--- a/drivers/rtc/rtc-dev.c
+++ b/drivers/rtc/rtc-dev.c
@@ -477,6 +477,7 @@ void rtc_dev_prepare(struct rtc_device *rtc)
 
 	cdev_init(&rtc->char_dev, &rtc_dev_fops);
 	rtc->char_dev.owner = rtc->owner;
+	rtc->char_dev.kobj.parent = &rtc->dev.kobj;
 }
 
 void rtc_dev_add_device(struct rtc_device *rtc)

From 0d9030a2c3214cf8f9bfff84204e0f5ba5e790d7 Mon Sep 17 00:00:00 2001
From: Octavian Purdila <octavian.purdila@intel.com>
Date: Fri, 5 Jun 2015 16:59:43 +0300
Subject: [PATCH 510/734] rtc: fix drivers that consider 0 as a valid IRQ in
 client->irq

Since dab472eb931b ("i2c / ACPI: Use 0 to indicate that device does not
have interrupt assigned"), 0 is not a valid i2c client irq anymore, so
change all driver's checks accordingly.

The same issue occurs when the device is instantiated via device tree
with no IRQ, or from the i2c sysfs interface, even before the patch
above.

Signed-off-by: Octavian Purdila <octavian.purdila@intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ds1374.c | 4 ++--
 drivers/rtc/rtc-ds3232.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/rtc/rtc-ds1374.c b/drivers/rtc/rtc-ds1374.c
index 6d8665647eee28..7067232ba50719 100644
--- a/drivers/rtc/rtc-ds1374.c
+++ b/drivers/rtc/rtc-ds1374.c
@@ -688,7 +688,7 @@ static int ds1374_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 
-	if (client->irq >= 0 && device_may_wakeup(&client->dev))
+	if (client->irq > 0 && device_may_wakeup(&client->dev))
 		enable_irq_wake(client->irq);
 	return 0;
 }
@@ -697,7 +697,7 @@ static int ds1374_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 
-	if (client->irq >= 0 && device_may_wakeup(&client->dev))
+	if (client->irq > 0 && device_may_wakeup(&client->dev))
 		disable_irq_wake(client->irq);
 	return 0;
 }
diff --git a/drivers/rtc/rtc-ds3232.c b/drivers/rtc/rtc-ds3232.c
index 18f062f2a63435..a8702dda0f2628 100644
--- a/drivers/rtc/rtc-ds3232.c
+++ b/drivers/rtc/rtc-ds3232.c
@@ -443,7 +443,7 @@ static int ds3232_remove(struct i2c_client *client)
 {
 	struct ds3232 *ds3232 = i2c_get_clientdata(client);
 
-	if (client->irq >= 0) {
+	if (client->irq > 0) {
 		mutex_lock(&ds3232->mutex);
 		ds3232->exiting = 1;
 		mutex_unlock(&ds3232->mutex);

From f2284f9c900a47961883b88064933a89b5dd5f46 Mon Sep 17 00:00:00 2001
From: Henri Roosen <henriroosen@gmail.com>
Date: Fri, 24 Jul 2015 10:16:06 +0200
Subject: [PATCH 511/734] rtc: rx8025: remove obsolete local_irq_disable() and
 local_irq_enable() for rtc_update_irq()

Since commit e6229bec25be ("rtc: make rtc_update_irq callable with irqs
enabled") rtc_update_irq() is callable with irqs enabled.

Signed-off-by: Henri Roosen <henriroosen@gmail.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-rx8025.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c
index a297542e2f8a39..6fe87702fcff3f 100644
--- a/drivers/rtc/rtc-rx8025.c
+++ b/drivers/rtc/rtc-rx8025.c
@@ -161,9 +161,7 @@ static void rx8025_work(struct work_struct *work)
 	if (status & RX8025_BIT_CTRL2_CTFG) {
 		/* periodic */
 		status &= ~RX8025_BIT_CTRL2_CTFG;
-		local_irq_disable();
 		rtc_update_irq(rx8025->rtc, 1, RTC_PF | RTC_IRQF);
-		local_irq_enable();
 	}
 
 	if (status & RX8025_BIT_CTRL2_DAFG) {
@@ -172,9 +170,7 @@ static void rx8025_work(struct work_struct *work)
 		if (rx8025_write_reg(client, RX8025_REG_CTRL1,
 				     rx8025->ctrl1 & ~RX8025_BIT_CTRL1_DALE))
 			goto out;
-		local_irq_disable();
 		rtc_update_irq(rx8025->rtc, 1, RTC_AF | RTC_IRQF);
-		local_irq_enable();
 	}
 
 	/* acknowledge IRQ */

From df100c017ea9f1a6a517c3fba84f8507973c004e Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 23 Jul 2015 16:01:06 -0700
Subject: [PATCH 512/734] rtc: make rtc_does_wakealarm() return boolean

Users of rtc_does_wakealarm() return value treat it as boolean so let's
change the signature accordingly.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-sysfs.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/rtc/rtc-sysfs.c b/drivers/rtc/rtc-sysfs.c
index babd43bf3ddc36..2fbc11bb4352fd 100644
--- a/drivers/rtc/rtc-sysfs.c
+++ b/drivers/rtc/rtc-sysfs.c
@@ -230,10 +230,11 @@ static DEVICE_ATTR(wakealarm, S_IRUGO | S_IWUSR,
  * suspend-to-disk.  So: no attribute unless that side effect is possible.
  * (Userspace may disable that mechanism later.)
  */
-static inline int rtc_does_wakealarm(struct rtc_device *rtc)
+static bool rtc_does_wakealarm(struct rtc_device *rtc)
 {
 	if (!device_can_wakeup(rtc->dev.parent))
-		return 0;
+		return false;
+
 	return rtc->ops->set_alarm != NULL;
 }
 

From a17ccd1c6a327e5b468358e8352a6af004261473 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 23 Jul 2015 16:01:07 -0700
Subject: [PATCH 513/734] rtc: switch wakealarm attribute to DEVICE_ATTR_RW

Instead of using older style DEVICE_ATTR for wakealarm attribute let's
switch to using DEVICE_ATTR_RW that ensures consistent across the kernel
permissions on the attribute.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-sysfs.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/rtc/rtc-sysfs.c b/drivers/rtc/rtc-sysfs.c
index 2fbc11bb4352fd..e3ce1dc92b654d 100644
--- a/drivers/rtc/rtc-sysfs.c
+++ b/drivers/rtc/rtc-sysfs.c
@@ -134,8 +134,7 @@ static struct attribute *rtc_attrs[] = {
 ATTRIBUTE_GROUPS(rtc);
 
 static ssize_t
-rtc_sysfs_show_wakealarm(struct device *dev, struct device_attribute *attr,
-		char *buf)
+wakealarm_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
 	ssize_t retval;
 	unsigned long alarm;
@@ -159,7 +158,7 @@ rtc_sysfs_show_wakealarm(struct device *dev, struct device_attribute *attr,
 }
 
 static ssize_t
-rtc_sysfs_set_wakealarm(struct device *dev, struct device_attribute *attr,
+wakealarm_store(struct device *dev, struct device_attribute *attr,
 		const char *buf, size_t n)
 {
 	ssize_t retval;
@@ -221,8 +220,7 @@ rtc_sysfs_set_wakealarm(struct device *dev, struct device_attribute *attr,
 	retval = rtc_set_alarm(rtc, &alm);
 	return (retval < 0) ? retval : n;
 }
-static DEVICE_ATTR(wakealarm, S_IRUGO | S_IWUSR,
-		rtc_sysfs_show_wakealarm, rtc_sysfs_set_wakealarm);
+static DEVICE_ATTR_RW(wakealarm);
 
 
 /* The reason to trigger an alarm with no process watching it (via sysfs)

From 3ee2c40b7ac2bf121aaa1176d8ac25b6a26e3a94 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 23 Jul 2015 16:01:08 -0700
Subject: [PATCH 514/734] rtc: switch to using is_visible() to control sysfs
 attributes

Instead of creating wakealarm attribute manually, after the device has been
registered, let's rely on facilities provided by the attribute groups to
control which attributes are visible and which are not. This allows to
create all needed attributes at once, at the same time that we register RTC
class device.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/class.c     |  4 +--
 drivers/rtc/rtc-core.h  | 19 +++----------
 drivers/rtc/rtc-sysfs.c | 59 +++++++++++++++++++++--------------------
 3 files changed, 34 insertions(+), 48 deletions(-)

diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index de7707f7e7666a..de86578bcd6d79 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -202,6 +202,7 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
 	rtc->max_user_freq = 64;
 	rtc->dev.parent = dev;
 	rtc->dev.class = rtc_class;
+	rtc->dev.groups = rtc_get_dev_attribute_groups();
 	rtc->dev.release = rtc_device_release;
 
 	mutex_init(&rtc->ops_lock);
@@ -240,7 +241,6 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
 	}
 
 	rtc_dev_add_device(rtc);
-	rtc_sysfs_add_device(rtc);
 	rtc_proc_add_device(rtc);
 
 	dev_info(dev, "rtc core: registered %s as %s\n",
@@ -271,7 +271,6 @@ void rtc_device_unregister(struct rtc_device *rtc)
 	 * Remove innards of this RTC, then disable it, before
 	 * letting any rtc_class_open() users access it again
 	 */
-	rtc_sysfs_del_device(rtc);
 	rtc_dev_del_device(rtc);
 	rtc_proc_del_device(rtc);
 	device_del(&rtc->dev);
@@ -360,7 +359,6 @@ static int __init rtc_init(void)
 	}
 	rtc_class->pm = RTC_CLASS_DEV_PM_OPS;
 	rtc_dev_init();
-	rtc_sysfs_init(rtc_class);
 	return 0;
 }
 
diff --git a/drivers/rtc/rtc-core.h b/drivers/rtc/rtc-core.h
index 5f9df7430a2272..a098aea197fc72 100644
--- a/drivers/rtc/rtc-core.h
+++ b/drivers/rtc/rtc-core.h
@@ -48,23 +48,10 @@ static inline void rtc_proc_del_device(struct rtc_device *rtc)
 #endif
 
 #ifdef CONFIG_RTC_INTF_SYSFS
-
-extern void __init rtc_sysfs_init(struct class *);
-extern void rtc_sysfs_add_device(struct rtc_device *rtc);
-extern void rtc_sysfs_del_device(struct rtc_device *rtc);
-
+const struct attribute_group **rtc_get_dev_attribute_groups(void);
 #else
-
-static inline void rtc_sysfs_init(struct class *rtc)
-{
-}
-
-static inline void rtc_sysfs_add_device(struct rtc_device *rtc)
+static inline const struct attribute_group **rtc_get_dev_attribute_groups(void)
 {
+	return NULL;
 }
-
-static inline void rtc_sysfs_del_device(struct rtc_device *rtc)
-{
-}
-
 #endif
diff --git a/drivers/rtc/rtc-sysfs.c b/drivers/rtc/rtc-sysfs.c
index e3ce1dc92b654d..7273855ed02eff 100644
--- a/drivers/rtc/rtc-sysfs.c
+++ b/drivers/rtc/rtc-sysfs.c
@@ -122,17 +122,6 @@ hctosys_show(struct device *dev, struct device_attribute *attr, char *buf)
 }
 static DEVICE_ATTR_RO(hctosys);
 
-static struct attribute *rtc_attrs[] = {
-	&dev_attr_name.attr,
-	&dev_attr_date.attr,
-	&dev_attr_time.attr,
-	&dev_attr_since_epoch.attr,
-	&dev_attr_max_user_freq.attr,
-	&dev_attr_hctosys.attr,
-	NULL,
-};
-ATTRIBUTE_GROUPS(rtc);
-
 static ssize_t
 wakealarm_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
@@ -222,6 +211,16 @@ wakealarm_store(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RW(wakealarm);
 
+static struct attribute *rtc_attrs[] = {
+	&dev_attr_name.attr,
+	&dev_attr_date.attr,
+	&dev_attr_time.attr,
+	&dev_attr_since_epoch.attr,
+	&dev_attr_max_user_freq.attr,
+	&dev_attr_hctosys.attr,
+	&dev_attr_wakealarm.attr,
+	NULL,
+};
 
 /* The reason to trigger an alarm with no process watching it (via sysfs)
  * is its side effect:  waking from a system state like suspend-to-RAM or
@@ -236,29 +235,31 @@ static bool rtc_does_wakealarm(struct rtc_device *rtc)
 	return rtc->ops->set_alarm != NULL;
 }
 
-
-void rtc_sysfs_add_device(struct rtc_device *rtc)
+static umode_t rtc_attr_is_visible(struct kobject *kobj,
+				   struct attribute *attr, int n)
 {
-	int err;
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct rtc_device *rtc = to_rtc_device(dev);
+	umode_t mode = attr->mode;
 
-	/* not all RTCs support both alarms and wakeup */
-	if (!rtc_does_wakealarm(rtc))
-		return;
+	if (attr == &dev_attr_wakealarm.attr)
+		if (!rtc_does_wakealarm(rtc))
+			mode = 0;
 
-	err = device_create_file(&rtc->dev, &dev_attr_wakealarm);
-	if (err)
-		dev_err(rtc->dev.parent,
-			"failed to create alarm attribute, %d\n", err);
+	return mode;
 }
 
-void rtc_sysfs_del_device(struct rtc_device *rtc)
-{
-	/* REVISIT did we add it successfully? */
-	if (rtc_does_wakealarm(rtc))
-		device_remove_file(&rtc->dev, &dev_attr_wakealarm);
-}
+static struct attribute_group rtc_attr_group = {
+	.is_visible	= rtc_attr_is_visible,
+	.attrs		= rtc_attrs,
+};
+
+static const struct attribute_group *rtc_attr_groups[] = {
+	&rtc_attr_group,
+	NULL
+};
 
-void __init rtc_sysfs_init(struct class *rtc_class)
+const struct attribute_group **rtc_get_dev_attribute_groups(void)
 {
-	rtc_class->dev_groups = rtc_groups;
+	return rtc_attr_groups;
 }

From d7f9777de884daf5721211bddfd21e7c8c156b17 Mon Sep 17 00:00:00 2001
From: Henry Chen <henryc.chen@mediatek.com>
Date: Thu, 30 Jul 2015 22:53:14 +0800
Subject: [PATCH 515/734] rtc: mt6397: implement suspend/resume function in
 rtc-mt6397 driver

Implement the suspend/resume function in order to control rtc's irq_wake flag and handle as wakeup source.

Signed-off-by: Henry Chen <henryc.chen@mediatek.com>
Acked-by: Eddie Huang <eddie.huang@mediatek.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-mt6397.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/drivers/rtc/rtc-mt6397.c b/drivers/rtc/rtc-mt6397.c
index eab230be5a54fd..30c926b363615c 100644
--- a/drivers/rtc/rtc-mt6397.c
+++ b/drivers/rtc/rtc-mt6397.c
@@ -373,6 +373,31 @@ static int mtk_rtc_remove(struct platform_device *pdev)
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int mt6397_rtc_suspend(struct device *dev)
+{
+	struct mt6397_rtc *rtc = dev_get_drvdata(dev);
+
+	if (device_may_wakeup(dev))
+		enable_irq_wake(rtc->irq);
+
+	return 0;
+}
+
+static int mt6397_rtc_resume(struct device *dev)
+{
+	struct mt6397_rtc *rtc = dev_get_drvdata(dev);
+
+	if (device_may_wakeup(dev))
+		disable_irq_wake(rtc->irq);
+
+	return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(mt6397_pm_ops, mt6397_rtc_suspend,
+			mt6397_rtc_resume);
+
 static const struct of_device_id mt6397_rtc_of_match[] = {
 	{ .compatible = "mediatek,mt6397-rtc", },
 	{ }
@@ -382,6 +407,7 @@ static struct platform_driver mtk_rtc_driver = {
 	.driver = {
 		.name = "mt6397-rtc",
 		.of_match_table = mt6397_rtc_of_match,
+		.pm = &mt6397_pm_ops,
 	},
 	.probe	= mtk_rtc_probe,
 	.remove = mtk_rtc_remove,

From 80ca3277bc7f398e3315af996443464dac5d4b88 Mon Sep 17 00:00:00 2001
From: S Twiss <stwiss.opensource@diasemi.com>
Date: Tue, 21 Jul 2015 11:29:07 +0100
Subject: [PATCH 516/734] rtc: da9063: Add DA9062 RTC capability to DA9063 RTC
 driver

Add DA9062 RTC support into the existing DA9063 RTC driver component by
using generic access tables for common register and bit mask definitions.

The following change will add generic register and bit mask support to the
DA9063 RTC. The changes are slightly complicated by requiring support for
three register sets: DA9063-AD, DA9063-BB and DA9062-AA.

The following alterations have been made to the DA9063 RTC:

- Addition of a da9063_compatible_rtc_regmap structure to hold all generic
  registers and bitmasks for this type of RTC component.
- A re-write of struct da9063 to use pointers for regmap and compatible
  registers/masks definitions
- Addition of a of_device_id table for DA9063 and DA9062 defaults
- Refactoring functions to use struct da9063_compatible_rtc accesses to
  generic registers/masks instead of using defines from registers.h
- Re-work of da9063_rtc_probe() to use of_match_node() and dev_get_regmap()
  to provide initialisation of generic registers and masks and access to
  regmap

Signed-off-by: Steve Twiss <stwiss.opensource@diasemi.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/Kconfig      |   6 +-
 drivers/rtc/rtc-da9063.c | 391 ++++++++++++++++++++++++++++-----------
 2 files changed, 282 insertions(+), 115 deletions(-)

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index e132ccbec51583..35ea04c50a5cac 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -945,11 +945,11 @@ config RTC_DRV_DA9055
 	  will be called rtc-da9055
 
 config RTC_DRV_DA9063
-	tristate "Dialog Semiconductor DA9063 RTC"
-	depends on MFD_DA9063
+	tristate "Dialog Semiconductor DA9063/DA9062 RTC"
+	depends on MFD_DA9063 || MFD_DA9062
 	help
 	  If you say yes here you will get support for the RTC subsystem
-	  of the Dialog Semiconductor DA9063.
+	  for the Dialog Semiconductor PMIC chips DA9063 and DA9062.
 
 	  This driver can also be built as a module. If so, the module
 	  will be called "rtc-da9063".
diff --git a/drivers/rtc/rtc-da9063.c b/drivers/rtc/rtc-da9063.c
index 7ffc5707f8b9da..5f38a5c84b5622 100644
--- a/drivers/rtc/rtc-da9063.c
+++ b/drivers/rtc/rtc-da9063.c
@@ -12,15 +12,18 @@
  * Library General Public License for more details.
  */
 
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/init.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
-#include <linux/interrupt.h>
+#include <linux/regmap.h>
 #include <linux/rtc.h>
 #include <linux/slab.h>
-#include <linux/delay.h>
-#include <linux/regmap.h>
+
+#include <linux/mfd/da9062/registers.h>
 #include <linux/mfd/da9063/registers.h>
 #include <linux/mfd/da9063/core.h>
 
@@ -29,99 +32,230 @@
 #define YEARS_FROM_DA9063(year)		((year) + 100)
 #define MONTHS_FROM_DA9063(month)	((month) - 1)
 
-#define RTC_ALARM_DATA_LEN (DA9063_AD_REG_ALARM_Y - DA9063_AD_REG_ALARM_MI + 1)
-
-#define RTC_DATA_LEN	(DA9063_REG_COUNT_Y - DA9063_REG_COUNT_S + 1)
-#define RTC_SEC		0
-#define RTC_MIN		1
-#define RTC_HOUR	2
-#define RTC_DAY		3
-#define RTC_MONTH	4
-#define RTC_YEAR	5
-
-struct da9063_rtc {
-	struct rtc_device	*rtc_dev;
-	struct da9063		*hw;
-	struct rtc_time		alarm_time;
-	bool			rtc_sync;
-	int			alarm_year;
-	int			alarm_start;
-	int			alarm_len;
-	int			data_start;
+enum {
+	RTC_SEC	= 0,
+	RTC_MIN	= 1,
+	RTC_HOUR = 2,
+	RTC_DAY	= 3,
+	RTC_MONTH = 4,
+	RTC_YEAR = 5,
+	RTC_DATA_LEN
+};
+
+struct da9063_compatible_rtc_regmap {
+	/* REGS */
+	int rtc_enable_reg;
+	int rtc_enable_32k_crystal_reg;
+	int rtc_alarm_secs_reg;
+	int rtc_alarm_year_reg;
+	int rtc_count_secs_reg;
+	int rtc_count_year_reg;
+	int rtc_event_reg;
+	/* MASKS */
+	int rtc_enable_mask;
+	int rtc_crystal_mask;
+	int rtc_event_alarm_mask;
+	int rtc_alarm_on_mask;
+	int rtc_alarm_status_mask;
+	int rtc_tick_on_mask;
+	int rtc_ready_to_read_mask;
+	int rtc_count_sec_mask;
+	int rtc_count_min_mask;
+	int rtc_count_hour_mask;
+	int rtc_count_day_mask;
+	int rtc_count_month_mask;
+	int rtc_count_year_mask;
+	/* ALARM CONFIG */
+	int rtc_data_start;
+	int rtc_alarm_len;
+};
+
+struct da9063_compatible_rtc {
+	struct rtc_device *rtc_dev;
+	struct rtc_time alarm_time;
+	struct regmap *regmap;
+	const struct da9063_compatible_rtc_regmap *config;
+	bool rtc_sync;
+};
+
+static const struct da9063_compatible_rtc_regmap da9063_ad_regs = {
+	/* REGS */
+	.rtc_enable_reg             = DA9063_REG_CONTROL_E,
+	.rtc_alarm_secs_reg         = DA9063_AD_REG_ALARM_MI,
+	.rtc_alarm_year_reg         = DA9063_AD_REG_ALARM_Y,
+	.rtc_count_secs_reg         = DA9063_REG_COUNT_S,
+	.rtc_count_year_reg         = DA9063_REG_COUNT_Y,
+	.rtc_event_reg              = DA9063_REG_EVENT_A,
+	/* MASKS */
+	.rtc_enable_mask            = DA9063_RTC_EN,
+	.rtc_crystal_mask           = DA9063_CRYSTAL,
+	.rtc_enable_32k_crystal_reg = DA9063_REG_EN_32K,
+	.rtc_event_alarm_mask       = DA9063_E_ALARM,
+	.rtc_alarm_on_mask          = DA9063_ALARM_ON,
+	.rtc_alarm_status_mask      = DA9063_ALARM_STATUS_ALARM |
+				      DA9063_ALARM_STATUS_TICK,
+	.rtc_tick_on_mask           = DA9063_TICK_ON,
+	.rtc_ready_to_read_mask     = DA9063_RTC_READ,
+	.rtc_count_sec_mask         = DA9063_COUNT_SEC_MASK,
+	.rtc_count_min_mask         = DA9063_COUNT_MIN_MASK,
+	.rtc_count_hour_mask        = DA9063_COUNT_HOUR_MASK,
+	.rtc_count_day_mask         = DA9063_COUNT_DAY_MASK,
+	.rtc_count_month_mask       = DA9063_COUNT_MONTH_MASK,
+	.rtc_count_year_mask        = DA9063_COUNT_YEAR_MASK,
+	/* ALARM CONFIG */
+	.rtc_data_start             = RTC_MIN,
+	.rtc_alarm_len              = RTC_DATA_LEN - 1,
+};
+
+static const struct da9063_compatible_rtc_regmap da9063_bb_regs = {
+	/* REGS */
+	.rtc_enable_reg             = DA9063_REG_CONTROL_E,
+	.rtc_alarm_secs_reg         = DA9063_BB_REG_ALARM_S,
+	.rtc_alarm_year_reg         = DA9063_BB_REG_ALARM_Y,
+	.rtc_count_secs_reg         = DA9063_REG_COUNT_S,
+	.rtc_count_year_reg         = DA9063_REG_COUNT_Y,
+	.rtc_event_reg              = DA9063_REG_EVENT_A,
+	/* MASKS */
+	.rtc_enable_mask            = DA9063_RTC_EN,
+	.rtc_crystal_mask           = DA9063_CRYSTAL,
+	.rtc_enable_32k_crystal_reg = DA9063_REG_EN_32K,
+	.rtc_event_alarm_mask       = DA9063_E_ALARM,
+	.rtc_alarm_on_mask          = DA9063_ALARM_ON,
+	.rtc_alarm_status_mask      = DA9063_ALARM_STATUS_ALARM |
+				      DA9063_ALARM_STATUS_TICK,
+	.rtc_tick_on_mask           = DA9063_TICK_ON,
+	.rtc_ready_to_read_mask     = DA9063_RTC_READ,
+	.rtc_count_sec_mask         = DA9063_COUNT_SEC_MASK,
+	.rtc_count_min_mask         = DA9063_COUNT_MIN_MASK,
+	.rtc_count_hour_mask        = DA9063_COUNT_HOUR_MASK,
+	.rtc_count_day_mask         = DA9063_COUNT_DAY_MASK,
+	.rtc_count_month_mask       = DA9063_COUNT_MONTH_MASK,
+	.rtc_count_year_mask        = DA9063_COUNT_YEAR_MASK,
+	/* ALARM CONFIG */
+	.rtc_data_start             = RTC_SEC,
+	.rtc_alarm_len              = RTC_DATA_LEN,
+};
+
+static const struct da9063_compatible_rtc_regmap da9062_aa_regs = {
+	/* REGS */
+	.rtc_enable_reg             = DA9062AA_CONTROL_E,
+	.rtc_alarm_secs_reg         = DA9062AA_ALARM_S,
+	.rtc_alarm_year_reg         = DA9062AA_ALARM_Y,
+	.rtc_count_secs_reg         = DA9062AA_COUNT_S,
+	.rtc_count_year_reg         = DA9062AA_COUNT_Y,
+	.rtc_event_reg              = DA9062AA_EVENT_A,
+	/* MASKS */
+	.rtc_enable_mask            = DA9062AA_RTC_EN_MASK,
+	.rtc_crystal_mask           = DA9062AA_CRYSTAL_MASK,
+	.rtc_enable_32k_crystal_reg = DA9062AA_EN_32K,
+	.rtc_event_alarm_mask       = DA9062AA_M_ALARM_MASK,
+	.rtc_alarm_on_mask          = DA9062AA_ALARM_ON_MASK,
+	.rtc_alarm_status_mask      = (0x02 << 6),
+	.rtc_tick_on_mask           = DA9062AA_TICK_ON_MASK,
+	.rtc_ready_to_read_mask     = DA9062AA_RTC_READ_MASK,
+	.rtc_count_sec_mask         = DA9062AA_COUNT_SEC_MASK,
+	.rtc_count_min_mask         = DA9062AA_COUNT_MIN_MASK,
+	.rtc_count_hour_mask        = DA9062AA_COUNT_HOUR_MASK,
+	.rtc_count_day_mask         = DA9062AA_COUNT_DAY_MASK,
+	.rtc_count_month_mask       = DA9062AA_COUNT_MONTH_MASK,
+	.rtc_count_year_mask        = DA9062AA_COUNT_YEAR_MASK,
+	/* ALARM CONFIG */
+	.rtc_data_start             = RTC_SEC,
+	.rtc_alarm_len              = RTC_DATA_LEN,
+};
+
+static const struct of_device_id da9063_compatible_reg_id_table[] = {
+	{ .compatible = "dlg,da9063-rtc", .data = &da9063_bb_regs },
+	{ .compatible = "dlg,da9062-rtc", .data = &da9062_aa_regs },
+	{ },
 };
 
-static void da9063_data_to_tm(u8 *data, struct rtc_time *tm)
+static void da9063_data_to_tm(u8 *data, struct rtc_time *tm,
+			      struct da9063_compatible_rtc *rtc)
 {
-	tm->tm_sec  = data[RTC_SEC]  & DA9063_COUNT_SEC_MASK;
-	tm->tm_min  = data[RTC_MIN]  & DA9063_COUNT_MIN_MASK;
-	tm->tm_hour = data[RTC_HOUR] & DA9063_COUNT_HOUR_MASK;
-	tm->tm_mday = data[RTC_DAY]  & DA9063_COUNT_DAY_MASK;
+	const struct da9063_compatible_rtc_regmap *config = rtc->config;
+
+	tm->tm_sec  = data[RTC_SEC]  & config->rtc_count_sec_mask;
+	tm->tm_min  = data[RTC_MIN]  & config->rtc_count_min_mask;
+	tm->tm_hour = data[RTC_HOUR] & config->rtc_count_hour_mask;
+	tm->tm_mday = data[RTC_DAY]  & config->rtc_count_day_mask;
 	tm->tm_mon  = MONTHS_FROM_DA9063(data[RTC_MONTH] &
-					 DA9063_COUNT_MONTH_MASK);
+					 config->rtc_count_month_mask);
 	tm->tm_year = YEARS_FROM_DA9063(data[RTC_YEAR] &
-					DA9063_COUNT_YEAR_MASK);
+					config->rtc_count_year_mask);
 }
 
-static void da9063_tm_to_data(struct rtc_time *tm, u8 *data)
+static void da9063_tm_to_data(struct rtc_time *tm, u8 *data,
+			      struct da9063_compatible_rtc *rtc)
 {
-	data[RTC_SEC] &= ~DA9063_COUNT_SEC_MASK;
-	data[RTC_SEC] |= tm->tm_sec & DA9063_COUNT_SEC_MASK;
+	const struct da9063_compatible_rtc_regmap *config = rtc->config;
+
+	data[RTC_SEC] &= ~config->rtc_count_sec_mask;
+	data[RTC_SEC] |= tm->tm_sec & config->rtc_count_sec_mask;
 
-	data[RTC_MIN] &= ~DA9063_COUNT_MIN_MASK;
-	data[RTC_MIN] |= tm->tm_min & DA9063_COUNT_MIN_MASK;
+	data[RTC_MIN] &= ~config->rtc_count_min_mask;
+	data[RTC_MIN] |= tm->tm_min & config->rtc_count_min_mask;
 
-	data[RTC_HOUR] &= ~DA9063_COUNT_HOUR_MASK;
-	data[RTC_HOUR] |= tm->tm_hour & DA9063_COUNT_HOUR_MASK;
+	data[RTC_HOUR] &= ~config->rtc_count_hour_mask;
+	data[RTC_HOUR] |= tm->tm_hour & config->rtc_count_hour_mask;
 
-	data[RTC_DAY] &= ~DA9063_COUNT_DAY_MASK;
-	data[RTC_DAY] |= tm->tm_mday & DA9063_COUNT_DAY_MASK;
+	data[RTC_DAY] &= ~config->rtc_count_day_mask;
+	data[RTC_DAY] |= tm->tm_mday & config->rtc_count_day_mask;
 
-	data[RTC_MONTH] &= ~DA9063_COUNT_MONTH_MASK;
+	data[RTC_MONTH] &= ~config->rtc_count_month_mask;
 	data[RTC_MONTH] |= MONTHS_TO_DA9063(tm->tm_mon) &
-				DA9063_COUNT_MONTH_MASK;
+				config->rtc_count_month_mask;
 
-	data[RTC_YEAR] &= ~DA9063_COUNT_YEAR_MASK;
+	data[RTC_YEAR] &= ~config->rtc_count_year_mask;
 	data[RTC_YEAR] |= YEARS_TO_DA9063(tm->tm_year) &
-				DA9063_COUNT_YEAR_MASK;
+				config->rtc_count_year_mask;
 }
 
 static int da9063_rtc_stop_alarm(struct device *dev)
 {
-	struct da9063_rtc *rtc = dev_get_drvdata(dev);
+	struct da9063_compatible_rtc *rtc = dev_get_drvdata(dev);
+	const struct da9063_compatible_rtc_regmap *config = rtc->config;
 
-	return regmap_update_bits(rtc->hw->regmap, rtc->alarm_year,
-				  DA9063_ALARM_ON, 0);
+	return regmap_update_bits(rtc->regmap,
+				  config->rtc_alarm_year_reg,
+				  config->rtc_alarm_on_mask,
+				  0);
 }
 
 static int da9063_rtc_start_alarm(struct device *dev)
 {
-	struct da9063_rtc *rtc = dev_get_drvdata(dev);
+	struct da9063_compatible_rtc *rtc = dev_get_drvdata(dev);
+	const struct da9063_compatible_rtc_regmap *config = rtc->config;
 
-	return regmap_update_bits(rtc->hw->regmap, rtc->alarm_year,
-				  DA9063_ALARM_ON, DA9063_ALARM_ON);
+	return regmap_update_bits(rtc->regmap,
+				  config->rtc_alarm_year_reg,
+				  config->rtc_alarm_on_mask,
+				  config->rtc_alarm_on_mask);
 }
 
 static int da9063_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
-	struct da9063_rtc *rtc = dev_get_drvdata(dev);
+	struct da9063_compatible_rtc *rtc = dev_get_drvdata(dev);
+	const struct da9063_compatible_rtc_regmap *config = rtc->config;
 	unsigned long tm_secs;
 	unsigned long al_secs;
 	u8 data[RTC_DATA_LEN];
 	int ret;
 
-	ret = regmap_bulk_read(rtc->hw->regmap, DA9063_REG_COUNT_S,
+	ret = regmap_bulk_read(rtc->regmap,
+			       config->rtc_count_secs_reg,
 			       data, RTC_DATA_LEN);
 	if (ret < 0) {
 		dev_err(dev, "Failed to read RTC time data: %d\n", ret);
 		return ret;
 	}
 
-	if (!(data[RTC_SEC] & DA9063_RTC_READ)) {
+	if (!(data[RTC_SEC] & config->rtc_ready_to_read_mask)) {
 		dev_dbg(dev, "RTC not yet ready to be read by the host\n");
 		return -EINVAL;
 	}
 
-	da9063_data_to_tm(data, tm);
+	da9063_data_to_tm(data, tm, rtc);
 
 	rtc_tm_to_time(tm, &tm_secs);
 	rtc_tm_to_time(&rtc->alarm_time, &al_secs);
@@ -137,12 +271,14 @@ static int da9063_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
 static int da9063_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
-	struct da9063_rtc *rtc = dev_get_drvdata(dev);
+	struct da9063_compatible_rtc *rtc = dev_get_drvdata(dev);
+	const struct da9063_compatible_rtc_regmap *config = rtc->config;
 	u8 data[RTC_DATA_LEN];
 	int ret;
 
-	da9063_tm_to_data(tm, data);
-	ret = regmap_bulk_write(rtc->hw->regmap, DA9063_REG_COUNT_S,
+	da9063_tm_to_data(tm, data, rtc);
+	ret = regmap_bulk_write(rtc->regmap,
+				config->rtc_count_secs_reg,
 				data, RTC_DATA_LEN);
 	if (ret < 0)
 		dev_err(dev, "Failed to set RTC time data: %d\n", ret);
@@ -152,26 +288,31 @@ static int da9063_rtc_set_time(struct device *dev, struct rtc_time *tm)
 
 static int da9063_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 {
-	struct da9063_rtc *rtc = dev_get_drvdata(dev);
+	struct da9063_compatible_rtc *rtc = dev_get_drvdata(dev);
+	const struct da9063_compatible_rtc_regmap *config = rtc->config;
 	u8 data[RTC_DATA_LEN];
 	int ret;
 	unsigned int val;
 
 	data[RTC_SEC] = 0;
-	ret = regmap_bulk_read(rtc->hw->regmap, rtc->alarm_start,
-			       &data[rtc->data_start], rtc->alarm_len);
+	ret = regmap_bulk_read(rtc->regmap,
+			       config->rtc_alarm_secs_reg,
+			       &data[config->rtc_data_start],
+			       config->rtc_alarm_len);
 	if (ret < 0)
 		return ret;
 
-	da9063_data_to_tm(data, &alrm->time);
+	da9063_data_to_tm(data, &alrm->time, rtc);
 
-	alrm->enabled = !!(data[RTC_YEAR] & DA9063_ALARM_ON);
+	alrm->enabled = !!(data[RTC_YEAR] & config->rtc_alarm_on_mask);
 
-	ret = regmap_read(rtc->hw->regmap, DA9063_REG_EVENT_A, &val);
+	ret = regmap_read(rtc->regmap,
+			  config->rtc_event_reg,
+			  &val);
 	if (ret < 0)
 		return ret;
 
-	if (val & (DA9063_E_ALARM))
+	if (val & config->rtc_event_alarm_mask)
 		alrm->pending = 1;
 	else
 		alrm->pending = 0;
@@ -181,11 +322,12 @@ static int da9063_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 
 static int da9063_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 {
-	struct da9063_rtc *rtc = dev_get_drvdata(dev);
+	struct da9063_compatible_rtc *rtc = dev_get_drvdata(dev);
+	const struct da9063_compatible_rtc_regmap *config = rtc->config;
 	u8 data[RTC_DATA_LEN];
 	int ret;
 
-	da9063_tm_to_data(&alrm->time, data);
+	da9063_tm_to_data(&alrm->time, data, rtc);
 
 	ret = da9063_rtc_stop_alarm(dev);
 	if (ret < 0) {
@@ -193,14 +335,16 @@ static int da9063_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 		return ret;
 	}
 
-	ret = regmap_bulk_write(rtc->hw->regmap, rtc->alarm_start,
-			       &data[rtc->data_start], rtc->alarm_len);
+	ret = regmap_bulk_write(rtc->regmap,
+				config->rtc_alarm_secs_reg,
+				&data[config->rtc_data_start],
+				config->rtc_alarm_len);
 	if (ret < 0) {
 		dev_err(dev, "Failed to write alarm: %d\n", ret);
 		return ret;
 	}
 
-	da9063_data_to_tm(data, &rtc->alarm_time);
+	da9063_data_to_tm(data, &rtc->alarm_time, rtc);
 
 	if (alrm->enabled) {
 		ret = da9063_rtc_start_alarm(dev);
@@ -213,7 +357,8 @@ static int da9063_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 	return ret;
 }
 
-static int da9063_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
+static int da9063_rtc_alarm_irq_enable(struct device *dev,
+				       unsigned int enabled)
 {
 	if (enabled)
 		return da9063_rtc_start_alarm(dev);
@@ -223,10 +368,13 @@ static int da9063_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
 
 static irqreturn_t da9063_alarm_event(int irq, void *data)
 {
-	struct da9063_rtc *rtc = data;
+	struct da9063_compatible_rtc *rtc = data;
+	const struct da9063_compatible_rtc_regmap *config = rtc->config;
 
-	regmap_update_bits(rtc->hw->regmap, rtc->alarm_year,
-			   DA9063_ALARM_ON, 0);
+	regmap_update_bits(rtc->regmap,
+			   config->rtc_alarm_year_reg,
+			   config->rtc_alarm_on_mask,
+			   0);
 
 	rtc->rtc_sync = true;
 	rtc_update_irq(rtc->rtc_dev, 1, RTC_IRQF | RTC_AF);
@@ -244,72 +392,92 @@ static const struct rtc_class_ops da9063_rtc_ops = {
 
 static int da9063_rtc_probe(struct platform_device *pdev)
 {
-	struct da9063 *da9063 = dev_get_drvdata(pdev->dev.parent);
-	struct da9063_rtc *rtc;
+	struct da9063_compatible_rtc *rtc;
+	const struct da9063_compatible_rtc_regmap *config;
+	const struct of_device_id *match;
 	int irq_alarm;
 	u8 data[RTC_DATA_LEN];
 	int ret;
 
-	ret = regmap_update_bits(da9063->regmap, DA9063_REG_CONTROL_E,
-				 DA9063_RTC_EN, DA9063_RTC_EN);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "Failed to enable RTC\n");
-		goto err;
-	}
+	if (!pdev->dev.of_node)
+		return -ENXIO;
 
-	ret = regmap_update_bits(da9063->regmap, DA9063_REG_EN_32K,
-				 DA9063_CRYSTAL, DA9063_CRYSTAL);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "Failed to run 32kHz oscillator\n");
-		goto err;
-	}
+	match = of_match_node(da9063_compatible_reg_id_table,
+			      pdev->dev.of_node);
 
 	rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL);
 	if (!rtc)
 		return -ENOMEM;
 
-	if (da9063->variant_code == PMIC_DA9063_AD) {
-		rtc->alarm_year = DA9063_AD_REG_ALARM_Y;
-		rtc->alarm_start = DA9063_AD_REG_ALARM_MI;
-		rtc->alarm_len = RTC_ALARM_DATA_LEN;
-		rtc->data_start = RTC_MIN;
-	} else {
-		rtc->alarm_year = DA9063_BB_REG_ALARM_Y;
-		rtc->alarm_start = DA9063_BB_REG_ALARM_S;
-		rtc->alarm_len = RTC_DATA_LEN;
-		rtc->data_start = RTC_SEC;
+	rtc->config = match->data;
+	if (of_device_is_compatible(pdev->dev.of_node, "dlg,da9063-rtc")) {
+		struct da9063 *chip = dev_get_drvdata(pdev->dev.parent);
+
+		if (chip->variant_code == PMIC_DA9063_AD)
+			rtc->config = &da9063_ad_regs;
 	}
 
-	ret = regmap_update_bits(da9063->regmap, rtc->alarm_start,
-			DA9063_ALARM_STATUS_TICK | DA9063_ALARM_STATUS_ALARM,
-			0);
+	rtc->regmap = dev_get_regmap(pdev->dev.parent, NULL);
+	if (!rtc->regmap) {
+		dev_warn(&pdev->dev, "Parent regmap unavailable.\n");
+		return -ENXIO;
+	}
+
+	config = rtc->config;
+	ret = regmap_update_bits(rtc->regmap,
+				 config->rtc_enable_reg,
+				 config->rtc_enable_mask,
+				 config->rtc_enable_mask);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Failed to enable RTC\n");
+		return ret;
+	}
+
+	ret = regmap_update_bits(rtc->regmap,
+				 config->rtc_enable_32k_crystal_reg,
+				 config->rtc_crystal_mask,
+				 config->rtc_crystal_mask);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Failed to run 32kHz oscillator\n");
+		return ret;
+	}
+
+	ret = regmap_update_bits(rtc->regmap,
+				 config->rtc_alarm_secs_reg,
+				 config->rtc_alarm_status_mask,
+				 0);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Failed to access RTC alarm register\n");
-		goto err;
+		return ret;
 	}
 
-	ret = regmap_update_bits(da9063->regmap, rtc->alarm_start,
+	ret = regmap_update_bits(rtc->regmap,
+				 config->rtc_alarm_secs_reg,
 				 DA9063_ALARM_STATUS_ALARM,
 				 DA9063_ALARM_STATUS_ALARM);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Failed to access RTC alarm register\n");
-		goto err;
+		return ret;
 	}
 
-	ret = regmap_update_bits(da9063->regmap, rtc->alarm_year,
-				 DA9063_TICK_ON, 0);
+	ret = regmap_update_bits(rtc->regmap,
+				 config->rtc_alarm_year_reg,
+				 config->rtc_tick_on_mask,
+				 0);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Failed to disable TICKs\n");
-		goto err;
+		return ret;
 	}
 
 	data[RTC_SEC] = 0;
-	ret = regmap_bulk_read(da9063->regmap, rtc->alarm_start,
-			       &data[rtc->data_start], rtc->alarm_len);
+	ret = regmap_bulk_read(rtc->regmap,
+			       config->rtc_alarm_secs_reg,
+			       &data[config->rtc_data_start],
+			       config->rtc_alarm_len);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Failed to read initial alarm data: %d\n",
 			ret);
-		goto err;
+		return ret;
 	}
 
 	platform_set_drvdata(pdev, rtc);
@@ -322,18 +490,16 @@ static int da9063_rtc_probe(struct platform_device *pdev)
 	if (ret) {
 		dev_err(&pdev->dev, "Failed to request ALARM IRQ %d: %d\n",
 			irq_alarm, ret);
-		goto err;
+		return ret;
 	}
 
-	rtc->hw = da9063;
 	rtc->rtc_dev = devm_rtc_device_register(&pdev->dev, DA9063_DRVNAME_RTC,
 					   &da9063_rtc_ops, THIS_MODULE);
 	if (IS_ERR(rtc->rtc_dev))
 		return PTR_ERR(rtc->rtc_dev);
 
-	da9063_data_to_tm(data, &rtc->alarm_time);
+	da9063_data_to_tm(data, &rtc->alarm_time, rtc);
 	rtc->rtc_sync = false;
-err:
 	return ret;
 }
 
@@ -341,6 +507,7 @@ static struct platform_driver da9063_rtc_driver = {
 	.probe		= da9063_rtc_probe,
 	.driver		= {
 		.name	= DA9063_DRVNAME_RTC,
+		.of_match_table = da9063_compatible_reg_id_table,
 	},
 };
 

From 88b8d33b1c6aadba553c998db91c4b36be0fac52 Mon Sep 17 00:00:00 2001
From: Adrian Huang <adrianhuang0701@gmail.com>
Date: Mon, 6 Jul 2015 12:19:12 +0800
Subject: [PATCH 517/734] rtc: cmos: Cancel alarm timer if alarm time is equal
 to now+1 seconds

Steps to reproduce the problem:
	1) Enable RTC wake-up option in BIOS Setup
	2) Issue one of these commands in the OS: "poweroff"
	   or "shutdown -h now"
	3) System will shut down and then reboot automatically

Root-cause of the issue:
	1) During the shutdown process, the hwclock utility is used
	   to save the system clock to hardware clock (RTC).
	2) The hwclock utility invokes ioctl() with RTC_UIE_ON. The
	   kernel configures the RTC alarm for the periodic interrupt
	   (every 1 second).
	3) The hwclock uitlity closes the /dev/rtc0 device, and the
	   kernel disables the RTC alarm irq (AIE bit of Register B)
	   via ioctl() with RTC_UIE_OFF. But, the configured alarm
	   time is the current_time + 1.
	4) After the next 1 second is elapsed, the AF (alarm
	   interrupt flag) of Register C is set.
	5) The S5 handler in BIOS is invoked to configure alarm
	   registers (enable AIE bit and configure alarm date/time).
	   But, BIOS does not clear the previous interrupt status
	   during alarm configuration. Therefore, "AF=AIE=1" causes
	   the rtc device to trigger an interrupt.
	6) So, the machine reboots automatically right after shutdown.

This patch cancels the alarm timer if the following condictions are
met (suggested by Alexandre):
	1) The configured alarm time is equal to current_time + 1
	   seconds.
	2) The AIE timer is not in use.

The member 'alarm_expires' is introduced in struct cmos_rtc because
of the following reasons:
	1) The configured alarm time can be retrieved from
	   cmos_read_alarm(), but we need to take the 'wrapped
	   timestamp' and 'time rollover' into consideration. The
	   function __rtc_read_alarm() eliminates the concerns. To
	   avoid the duplicated code in the lower level RTC driver,
	   invoking __rtc_read_alarm from the lower level RTC driver
	   is not encouraged. Moreover, the compilation error 'the
	   undefined __rtc_read_alarm" is observed if the lower level
	   RTC driver is compiled as a kernel module.
	2) The uie_rtctimer.node.expires and aie_timer.node.expires can
	   be retrieved for the configured alarm time. But, the problem
	   is that either of them might configure the CMOS alarm time.
	   We cannot make sure UIE timer or AIE tiemr configured the
	   CMOS alarm time before. (uie_rtctimer or aie_timer is enabled
	   and then is disabled).
	3) The patch introduces the member 'alarm_expires' to keep the
	   newly configured alarm time, so the above-mentioned concerns
	   can be eliminated.

The issue goes away after 20-time shutdown tests.

Signed-off-by: Adrian Huang <ahuang12@lenovo.com>
Tested-by: Egbert Eich <eich@suse.de>
Tested-by: Diego Ercolani <diego.ercolani@gmail.com>
Cc: Borislav Petkov <bp@suse.de>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-cmos.c | 64 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 60 insertions(+), 4 deletions(-)

diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index a82556a0757a2f..5ac9a5da85228d 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -51,6 +51,7 @@ struct cmos_rtc {
 	struct device		*dev;
 	int			irq;
 	struct resource		*iomem;
+	time64_t		alarm_expires;
 
 	void			(*wake_on)(struct device *);
 	void			(*wake_off)(struct device *);
@@ -377,6 +378,8 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 
 	spin_unlock_irq(&rtc_lock);
 
+	cmos->alarm_expires = rtc_tm_to_time64(&t->time);
+
 	return 0;
 }
 
@@ -860,6 +863,51 @@ static void __exit cmos_do_remove(struct device *dev)
 	cmos->dev = NULL;
 }
 
+static int cmos_aie_poweroff(struct device *dev)
+{
+	struct cmos_rtc	*cmos = dev_get_drvdata(dev);
+	struct rtc_time now;
+	time64_t t_now;
+	int retval = 0;
+	unsigned char rtc_control;
+
+	if (!cmos->alarm_expires)
+		return -EINVAL;
+
+	spin_lock_irq(&rtc_lock);
+	rtc_control = CMOS_READ(RTC_CONTROL);
+	spin_unlock_irq(&rtc_lock);
+
+	/* We only care about the situation where AIE is disabled. */
+	if (rtc_control & RTC_AIE)
+		return -EBUSY;
+
+	cmos_read_time(dev, &now);
+	t_now = rtc_tm_to_time64(&now);
+
+	/*
+	 * When enabling "RTC wake-up" in BIOS setup, the machine reboots
+	 * automatically right after shutdown on some buggy boxes.
+	 * This automatic rebooting issue won't happen when the alarm
+	 * time is larger than now+1 seconds.
+	 *
+	 * If the alarm time is equal to now+1 seconds, the issue can be
+	 * prevented by cancelling the alarm.
+	 */
+	if (cmos->alarm_expires == t_now + 1) {
+		struct rtc_wkalrm alarm;
+
+		/* Cancel the AIE timer by configuring the past time. */
+		rtc_time64_to_tm(t_now - 1, &alarm.time);
+		alarm.enabled = 0;
+		retval = cmos_set_alarm(dev, &alarm);
+	} else if (cmos->alarm_expires > t_now + 1) {
+		retval = -EBUSY;
+	}
+
+	return retval;
+}
+
 #ifdef CONFIG_PM
 
 static int cmos_suspend(struct device *dev)
@@ -1094,8 +1142,12 @@ static void cmos_pnp_shutdown(struct pnp_dev *pnp)
 	struct device *dev = &pnp->dev;
 	struct cmos_rtc	*cmos = dev_get_drvdata(dev);
 
-	if (system_state == SYSTEM_POWER_OFF && !cmos_poweroff(dev))
-		return;
+	if (system_state == SYSTEM_POWER_OFF) {
+		int retval = cmos_poweroff(dev);
+
+		if (cmos_aie_poweroff(dev) < 0 && !retval)
+			return;
+	}
 
 	cmos_do_shutdown(cmos->irq);
 }
@@ -1200,8 +1252,12 @@ static void cmos_platform_shutdown(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct cmos_rtc	*cmos = dev_get_drvdata(dev);
 
-	if (system_state == SYSTEM_POWER_OFF && !cmos_poweroff(dev))
-		return;
+	if (system_state == SYSTEM_POWER_OFF) {
+		int retval = cmos_poweroff(dev);
+
+		if (cmos_aie_poweroff(dev) < 0 && !retval)
+			return;
+	}
 
 	cmos_do_shutdown(cmos->irq);
 }

From 8109d44f7604cd5ab833ea09da98703f6eb16460 Mon Sep 17 00:00:00 2001
From: Adrian Huang <adrianhuang0701@gmail.com>
Date: Mon, 6 Jul 2015 12:19:13 +0800
Subject: [PATCH 518/734] rtc: cmos: Revert "rtc-cmos: Add an alarm disable
 quirk"

Commit d5a1c7e3fc38 ("rtc-cmos: Add an alarm disable quirk") that
added a special quirk is not needed because [PATCH 1/2] of this
patchset makes the kernel more robust:
rtc-cmos: Cancel alarm timer if alarm time is equal to now+1 seconds

Signed-off-by: Adrian Huang <ahuang12@lenovo.com>
Tested-by: Egbert Eich <eich@suse.de>
Tested-by: Diego Ercolani <diego.ercolani@gmail.com>
Cc: Borislav Petkov <bp@suse.de>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-cmos.c | 50 ------------------------------------------
 1 file changed, 50 deletions(-)

diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 5ac9a5da85228d..c2e90c62a59cab 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -41,7 +41,6 @@
 #include <linux/pm.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
-#include <linux/dmi.h>
 
 /* this is for "generic access to PC-style RTC" using CMOS_READ/CMOS_WRITE */
 #include <asm-generic/rtc.h>
@@ -383,50 +382,6 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 	return 0;
 }
 
-/*
- * Do not disable RTC alarm on shutdown - workaround for b0rked BIOSes.
- */
-static bool alarm_disable_quirk;
-
-static int __init set_alarm_disable_quirk(const struct dmi_system_id *id)
-{
-	alarm_disable_quirk = true;
-	pr_info("BIOS has alarm-disable quirk - RTC alarms disabled\n");
-	return 0;
-}
-
-static const struct dmi_system_id rtc_quirks[] __initconst = {
-	/* https://bugzilla.novell.com/show_bug.cgi?id=805740 */
-	{
-		.callback = set_alarm_disable_quirk,
-		.ident    = "IBM Truman",
-		.matches  = {
-			DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
-			DMI_MATCH(DMI_PRODUCT_NAME, "4852570"),
-		},
-	},
-	/* https://bugzilla.novell.com/show_bug.cgi?id=812592 */
-	{
-		.callback = set_alarm_disable_quirk,
-		.ident    = "Gigabyte GA-990XA-UD3",
-		.matches  = {
-			DMI_MATCH(DMI_SYS_VENDOR,
-					"Gigabyte Technology Co., Ltd."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "GA-990XA-UD3"),
-		},
-	},
-	/* http://permalink.gmane.org/gmane.linux.kernel/1604474 */
-	{
-		.callback = set_alarm_disable_quirk,
-		.ident    = "Toshiba Satellite L300",
-		.matches  = {
-			DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
-			DMI_MATCH(DMI_PRODUCT_NAME, "Satellite L300"),
-		},
-	},
-	{}
-};
-
 static int cmos_alarm_irq_enable(struct device *dev, unsigned int enabled)
 {
 	struct cmos_rtc	*cmos = dev_get_drvdata(dev);
@@ -435,9 +390,6 @@ static int cmos_alarm_irq_enable(struct device *dev, unsigned int enabled)
 	if (!is_valid_irq(cmos->irq))
 		return -EINVAL;
 
-	if (alarm_disable_quirk)
-		return 0;
-
 	spin_lock_irqsave(&rtc_lock, flags);
 
 	if (enabled)
@@ -1299,8 +1251,6 @@ static int __init cmos_init(void)
 			platform_driver_registered = true;
 	}
 
-	dmi_check_system(rtc_quirks);
-
 	if (retval == 0)
 		return 0;
 

From 1c4fc2955ad37afb0d75ed5a67bad94c1529b0cf Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@osg.samsung.com>
Date: Thu, 30 Jul 2015 18:18:46 +0200
Subject: [PATCH 519/734] rtc: Export OF module alias information in missing
 drivers

The I2C core always reports the MODALIAS uevent as "i2c:<client name"
regardless if the driver was matched using the I2C id_table or the
of_match_table. So technically there's no need for a driver to export
the OF table since currently it's not used.

In fact, the I2C device ID table is mandatory for I2C drivers since
a i2c_device_id is passed to the driver's probe function even if the
I2C core used the OF table to match the driver.

And since the I2C core uses different tables, OF-only drivers needs to
have duplicated data that has to be kept in sync and also the dev node
compatible manufacturer prefix is stripped when reporting the MODALIAS.

To avoid the above, the I2C core behavior may be changed in the future
to not require an I2C device table for OF-only drivers and report the
OF module alias. So, it's better to also export the OF table to prevent
breaking module autoloading if that happens.

Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ab-b5ze-s3.c | 1 +
 drivers/rtc/rtc-isl12022.c   | 1 +
 drivers/rtc/rtc-isl12057.c   | 1 +
 3 files changed, 3 insertions(+)

diff --git a/drivers/rtc/rtc-ab-b5ze-s3.c b/drivers/rtc/rtc-ab-b5ze-s3.c
index 0fb1d767afa9e2..a319bf1e49dea4 100644
--- a/drivers/rtc/rtc-ab-b5ze-s3.c
+++ b/drivers/rtc/rtc-ab-b5ze-s3.c
@@ -1009,6 +1009,7 @@ static const struct of_device_id abb5zes3_dt_match[] = {
 	{ .compatible = "abracon,abb5zes3" },
 	{ },
 };
+MODULE_DEVICE_TABLE(of, abb5zes3_dt_match);
 #endif
 
 static const struct i2c_device_id abb5zes3_id[] = {
diff --git a/drivers/rtc/rtc-isl12022.c b/drivers/rtc/rtc-isl12022.c
index 37262713678636..839d1fd63cd78d 100644
--- a/drivers/rtc/rtc-isl12022.c
+++ b/drivers/rtc/rtc-isl12022.c
@@ -274,6 +274,7 @@ static const struct of_device_id isl12022_dt_match[] = {
 	{ .compatible = "isil,isl12022" },
 	{ },
 };
+MODULE_DEVICE_TABLE(of, isl12022_dt_match);
 #endif
 
 static const struct i2c_device_id isl12022_id[] = {
diff --git a/drivers/rtc/rtc-isl12057.c b/drivers/rtc/rtc-isl12057.c
index ee3e8dbcacafe6..a0462e5430c79c 100644
--- a/drivers/rtc/rtc-isl12057.c
+++ b/drivers/rtc/rtc-isl12057.c
@@ -648,6 +648,7 @@ static const struct of_device_id isl12057_dt_match[] = {
 	{ .compatible = "isil,isl12057" },
 	{ },
 };
+MODULE_DEVICE_TABLE(of, isl12057_dt_match);
 #endif
 
 static const struct i2c_device_id isl12057_id[] = {

From 27675ef03c0535158b77af76591920fe041ebf14 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Wed, 29 Jul 2015 14:13:40 +0800
Subject: [PATCH 520/734] rtc: pl031: fix typo for author email

The email address missed character ">", so add it.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-pl031.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c
index 99181fff88fd6f..41dcb7ddb90604 100644
--- a/drivers/rtc/rtc-pl031.c
+++ b/drivers/rtc/rtc-pl031.c
@@ -476,6 +476,6 @@ static struct amba_driver pl031_driver = {
 
 module_amba_driver(pl031_driver);
 
-MODULE_AUTHOR("Deepak Saxena <dsaxena@plexity.net");
+MODULE_AUTHOR("Deepak Saxena <dsaxena@plexity.net>");
 MODULE_DESCRIPTION("ARM AMBA PL031 RTC Driver");
 MODULE_LICENSE("GPL");

From ffe60fcfda98a3e4f51bc1e02ff5412a7e1c9c79 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Tue, 28 Jul 2015 21:46:15 +0200
Subject: [PATCH 521/734] rtc: at91sam9: properly handle error case

In case of a probe error, it is possible to abort after issuing
clk_prepare_enable(). Ensure the clock is disabled and unprepared in that
case.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 drivers/rtc/rtc-at91sam9.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c
index 5ccaee32df7223..152cd816cc4337 100644
--- a/drivers/rtc/rtc-at91sam9.c
+++ b/drivers/rtc/rtc-at91sam9.c
@@ -451,8 +451,10 @@ static int at91_rtc_probe(struct platform_device *pdev)
 
 	rtc->rtcdev = devm_rtc_device_register(&pdev->dev, pdev->name,
 					&at91_rtc_ops, THIS_MODULE);
-	if (IS_ERR(rtc->rtcdev))
-		return PTR_ERR(rtc->rtcdev);
+	if (IS_ERR(rtc->rtcdev)) {
+		ret = PTR_ERR(rtc->rtcdev);
+		goto err_clk;
+	}
 
 	/* register irq handler after we know what name we'll use */
 	ret = devm_request_irq(&pdev->dev, rtc->irq, at91_rtc_interrupt,
@@ -460,7 +462,7 @@ static int at91_rtc_probe(struct platform_device *pdev)
 			       dev_name(&rtc->rtcdev->dev), rtc);
 	if (ret) {
 		dev_dbg(&pdev->dev, "can't share IRQ %d?\n", rtc->irq);
-		return ret;
+		goto err_clk;
 	}
 
 	/* NOTE:  sam9260 rev A silicon has a ROM bug which resets the
@@ -474,6 +476,11 @@ static int at91_rtc_probe(struct platform_device *pdev)
 				dev_name(&rtc->rtcdev->dev));
 
 	return 0;
+
+err_clk:
+	clk_disable_unprepare(rtc->sclk);
+
+	return ret;
 }
 
 /*

From 73ab31ce1bbc64c590b2a2d58364942adfa11a3f Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Tue, 28 Jul 2015 21:47:57 +0200
Subject: [PATCH 522/734] rtc: at91sam9: remove useless check

rtc->sclk necessarily points to a valid clocks at this point. Else the
probe would have aborted.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 drivers/rtc/rtc-at91sam9.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c
index 152cd816cc4337..0a8485ac3864de 100644
--- a/drivers/rtc/rtc-at91sam9.c
+++ b/drivers/rtc/rtc-at91sam9.c
@@ -494,8 +494,7 @@ static int at91_rtc_remove(struct platform_device *pdev)
 	/* disable all interrupts */
 	rtt_writel(rtc, MR, mr & ~(AT91_RTT_ALMIEN | AT91_RTT_RTTINCIEN));
 
-	if (!IS_ERR(rtc->sclk))
-		clk_disable_unprepare(rtc->sclk);
+	clk_disable_unprepare(rtc->sclk);
 
 	return 0;
 }

From 6932ff5395e3a2541fba696b38dc71393cf7ce57 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Tue, 28 Jul 2015 21:49:24 +0200
Subject: [PATCH 523/734] rtc: at91sam9: sort headers alphabetically

Sort included headers alphabetically.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 drivers/rtc/rtc-at91sam9.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c
index 0a8485ac3864de..23f721d049b265 100644
--- a/drivers/rtc/rtc-at91sam9.c
+++ b/drivers/rtc/rtc-at91sam9.c
@@ -11,20 +11,20 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/platform_device.h>
-#include <linux/time.h>
-#include <linux/rtc.h>
+#include <linux/clk.h>
 #include <linux/interrupt.h>
 #include <linux/ioctl.h>
-#include <linux/slab.h>
-#include <linux/platform_data/atmel.h>
 #include <linux/io.h>
+#include <linux/kernel.h>
 #include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/platform_data/atmel.h>
+#include <linux/platform_device.h>
 #include <linux/regmap.h>
+#include <linux/rtc.h>
+#include <linux/slab.h>
 #include <linux/suspend.h>
-#include <linux/clk.h>
+#include <linux/time.h>
 
 /*
  * This driver uses two configurable hardware resources that live in the

From 8918bd8a5f6c37963ba04ae79ad6488108894ab9 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Tue, 28 Jul 2015 21:51:10 +0200
Subject: [PATCH 524/734] rtc: at91sam9: get sclk rate after enabling it

See help for clk_get_rate(): "obtain the current clock rate (in Hz) for a
clock source. This is only valid once the clock source has been enabled."

It currently returns the correct value but that may not stay that way.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 drivers/rtc/rtc-at91sam9.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c
index 23f721d049b265..f9e85ace5e71db 100644
--- a/drivers/rtc/rtc-at91sam9.c
+++ b/drivers/rtc/rtc-at91sam9.c
@@ -425,18 +425,19 @@ static int at91_rtc_probe(struct platform_device *pdev)
 	if (IS_ERR(rtc->sclk))
 		return PTR_ERR(rtc->sclk);
 
-	sclk_rate = clk_get_rate(rtc->sclk);
-	if (!sclk_rate || sclk_rate > AT91_RTT_RTPRES) {
-		dev_err(&pdev->dev, "Invalid slow clock rate\n");
-		return -EINVAL;
-	}
-
 	ret = clk_prepare_enable(rtc->sclk);
 	if (ret) {
 		dev_err(&pdev->dev, "Could not enable slow clock\n");
 		return ret;
 	}
 
+	sclk_rate = clk_get_rate(rtc->sclk);
+	if (!sclk_rate || sclk_rate > AT91_RTT_RTPRES) {
+		dev_err(&pdev->dev, "Invalid slow clock rate\n");
+		ret = -EINVAL;
+		goto err_clk;
+	}
+
 	mr = rtt_readl(rtc, MR);
 
 	/* unless RTT is counting at 1 Hz, re-initialize it */

From 74000eb14a173f427240b90363580c686a675ddf Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Wed, 29 Jul 2015 02:01:33 +0200
Subject: [PATCH 525/734] rtc: at91rm9200: sort headers alphabetically

Sort included headers alphabetically.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 drivers/rtc/rtc-at91rm9200.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c
index 35efd3f75b1802..c4062d9f1bdd20 100644
--- a/drivers/rtc/rtc-at91rm9200.c
+++ b/drivers/rtc/rtc-at91rm9200.c
@@ -18,20 +18,20 @@
  *
  */
 
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/platform_device.h>
-#include <linux/time.h>
-#include <linux/rtc.h>
 #include <linux/bcd.h>
+#include <linux/completion.h>
 #include <linux/interrupt.h>
-#include <linux/spinlock.h>
 #include <linux/ioctl.h>
-#include <linux/completion.h>
 #include <linux/io.h>
-#include <linux/of.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/rtc.h>
+#include <linux/spinlock.h>
 #include <linux/suspend.h>
+#include <linux/time.h>
 #include <linux/uaccess.h>
 
 #include "rtc-at91rm9200.h"

From e7cba884af366f49ab7b7f5157e690357addebba Mon Sep 17 00:00:00 2001
From: viresh kumar <viresh.kumar@linaro.org>
Date: Fri, 31 Jul 2015 16:23:43 +0530
Subject: [PATCH 526/734] rtc: Drop (un)likely before IS_ERR(_OR_NULL)

IS_ERR(_OR_NULL) already contain an 'unlikely' compiler flag and there
is no need to do that again from its callers. Drop it.

gemini driver was using likely() for a failure case while the rtc driver
is getting registered. That looks wrong and it should really be
unlikely. But because we are killing all the unlikely() flags, lets kill
that too.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Hans Ulli Kroll <ulli.kroll@googlemail.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/interface.c  | 2 +-
 drivers/rtc/rtc-bfin.c   | 2 +-
 drivers/rtc/rtc-gemini.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index 11b639067312f8..5836751b8203eb 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -564,7 +564,7 @@ enum hrtimer_restart rtc_pie_update_irq(struct hrtimer *timer)
 void rtc_update_irq(struct rtc_device *rtc,
 		unsigned long num, unsigned long events)
 {
-	if (unlikely(IS_ERR_OR_NULL(rtc)))
+	if (IS_ERR_OR_NULL(rtc))
 		return;
 
 	pm_stay_awake(rtc->dev.parent);
diff --git a/drivers/rtc/rtc-bfin.c b/drivers/rtc/rtc-bfin.c
index 3d44b11721ea03..535a5f9338d026 100644
--- a/drivers/rtc/rtc-bfin.c
+++ b/drivers/rtc/rtc-bfin.c
@@ -361,7 +361,7 @@ static int bfin_rtc_probe(struct platform_device *pdev)
 	/* Register our RTC with the RTC framework */
 	rtc->rtc_dev = devm_rtc_device_register(dev, pdev->name, &bfin_rtc_ops,
 						THIS_MODULE);
-	if (unlikely(IS_ERR(rtc->rtc_dev)))
+	if (IS_ERR(rtc->rtc_dev))
 		return PTR_ERR(rtc->rtc_dev);
 
 	/* Grab the IRQ and init the hardware */
diff --git a/drivers/rtc/rtc-gemini.c b/drivers/rtc/rtc-gemini.c
index 35f4486738fcef..2fed93e1114a6d 100644
--- a/drivers/rtc/rtc-gemini.c
+++ b/drivers/rtc/rtc-gemini.c
@@ -148,7 +148,7 @@ static int gemini_rtc_probe(struct platform_device *pdev)
 
 	rtc->rtc_dev = rtc_device_register(pdev->name, dev,
 					   &gemini_rtc_ops, THIS_MODULE);
-	if (likely(IS_ERR(rtc->rtc_dev)))
+	if (IS_ERR(rtc->rtc_dev))
 		return PTR_ERR(rtc->rtc_dev);
 
 	return 0;

From 202cc98acf96de1c3897194e1ed5ae1c80c8b0f3 Mon Sep 17 00:00:00 2001
From: kbuild test robot <fengguang.wu@intel.com>
Date: Fri, 31 Jul 2015 15:01:04 +0530
Subject: [PATCH 527/734] rtc: gemini: fix ptr_ret.cocci warnings

drivers/rtc/rtc-gemini.c:151:1-3: WARNING: PTR_ERR_OR_ZERO can be used

 Use PTR_ERR_OR_ZERO rather than if(IS_ERR(...)) + PTR_ERR

Generated by: scripts/coccinelle/api/ptr_ret.cocci

Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-gemini.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/rtc/rtc-gemini.c b/drivers/rtc/rtc-gemini.c
index 2fed93e1114a6d..e84184647d1525 100644
--- a/drivers/rtc/rtc-gemini.c
+++ b/drivers/rtc/rtc-gemini.c
@@ -148,10 +148,7 @@ static int gemini_rtc_probe(struct platform_device *pdev)
 
 	rtc->rtc_dev = rtc_device_register(pdev->name, dev,
 					   &gemini_rtc_ops, THIS_MODULE);
-	if (IS_ERR(rtc->rtc_dev))
-		return PTR_ERR(rtc->rtc_dev);
-
-	return 0;
+	return PTR_ERR_OR_ZERO(rtc->rtc_dev);
 }
 
 static int gemini_rtc_remove(struct platform_device *pdev)

From 11f67a8bbf65872c3e9edc70242420a8c314a860 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Fri, 31 Jul 2015 11:39:51 +0200
Subject: [PATCH 528/734] rtc: at91rm9200: get and use slow clock

Commit dca1a4b5ff6e ("clk: at91: keep slow clk enabled to prevent system
hang") added a workaround for the slow clock as it is not properly handled
by its users.

Get and use the slow clock as it is necessary for the at91rm9200 rtc.

Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-at91rm9200.c | 27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c
index c4062d9f1bdd20..cb62e214b52a00 100644
--- a/drivers/rtc/rtc-at91rm9200.c
+++ b/drivers/rtc/rtc-at91rm9200.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/bcd.h>
+#include <linux/clk.h>
 #include <linux/completion.h>
 #include <linux/interrupt.h>
 #include <linux/ioctl.h>
@@ -59,6 +60,7 @@ static bool suspended;
 static DEFINE_SPINLOCK(suspended_lock);
 static unsigned long cached_events;
 static u32 at91_rtc_imr;
+static struct clk *sclk;
 
 static void at91_rtc_write_ier(u32 mask)
 {
@@ -407,6 +409,16 @@ static int __init at91_rtc_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	}
 
+	sclk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(sclk))
+		return PTR_ERR(sclk);
+
+	ret = clk_prepare_enable(sclk);
+	if (ret) {
+		dev_err(&pdev->dev, "Could not enable slow clock\n");
+		return ret;
+	}
+
 	at91_rtc_write(AT91_RTC_CR, 0);
 	at91_rtc_write(AT91_RTC_MR, 0);		/* 24 hour mode */
 
@@ -420,7 +432,7 @@ static int __init at91_rtc_probe(struct platform_device *pdev)
 			       "at91_rtc", pdev);
 	if (ret) {
 		dev_err(&pdev->dev, "IRQ %d already in use.\n", irq);
-		return ret;
+		goto err_clk;
 	}
 
 	/* cpu init code should really have flagged this device as
@@ -431,8 +443,10 @@ static int __init at91_rtc_probe(struct platform_device *pdev)
 
 	rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
 				&at91_rtc_ops, THIS_MODULE);
-	if (IS_ERR(rtc))
-		return PTR_ERR(rtc);
+	if (IS_ERR(rtc)) {
+		ret = PTR_ERR(rtc);
+		goto err_clk;
+	}
 	platform_set_drvdata(pdev, rtc);
 
 	/* enable SECEV interrupt in order to initialize at91_rtc_upd_rdy
@@ -442,6 +456,11 @@ static int __init at91_rtc_probe(struct platform_device *pdev)
 
 	dev_info(&pdev->dev, "AT91 Real Time Clock driver.\n");
 	return 0;
+
+err_clk:
+	clk_disable_unprepare(sclk);
+
+	return ret;
 }
 
 /*
@@ -454,6 +473,8 @@ static int __exit at91_rtc_remove(struct platform_device *pdev)
 					AT91_RTC_SECEV | AT91_RTC_TIMEV |
 					AT91_RTC_CALEV);
 
+	clk_disable_unprepare(sclk);
+
 	return 0;
 }
 

From 3c217e51d8a272b9301058fe845d6c69cc0651cb Mon Sep 17 00:00:00 2001
From: Sylvain Chouleur <sylvain.chouleur@intel.com>
Date: Mon, 8 Jun 2015 11:45:19 +0200
Subject: [PATCH 529/734] rtc: cmos: century support

If century field is supported by the RTC CMOS device, then we should use
it and then do not consider years greater that 169 as an error.

For information, the year field of the rtc_time structure contains the
value to add to 1970 to obtain the current year.

This was a hack to be able to support years for 1970 to 2069.
This patch remains compatible with this implementation.

Signed-off-by: Sylvain Chouleur <sylvain.chouleur@intel.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 include/asm-generic/rtc.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/include/asm-generic/rtc.h b/include/asm-generic/rtc.h
index fa86f240c87432..4e3b6558331eac 100644
--- a/include/asm-generic/rtc.h
+++ b/include/asm-generic/rtc.h
@@ -16,6 +16,9 @@
 #include <linux/rtc.h>
 #include <linux/bcd.h>
 #include <linux/delay.h>
+#ifdef CONFIG_ACPI
+#include <linux/acpi.h>
+#endif
 
 #define RTC_PIE 0x40		/* periodic interrupt enable */
 #define RTC_AIE 0x20		/* alarm interrupt enable */
@@ -46,6 +49,7 @@ static inline unsigned int __get_rtc_time(struct rtc_time *time)
 {
 	unsigned char ctrl;
 	unsigned long flags;
+	unsigned char century = 0;
 
 #ifdef CONFIG_MACH_DECSTATION
 	unsigned int real_year;
@@ -78,6 +82,11 @@ static inline unsigned int __get_rtc_time(struct rtc_time *time)
 	time->tm_year = CMOS_READ(RTC_YEAR);
 #ifdef CONFIG_MACH_DECSTATION
 	real_year = CMOS_READ(RTC_DEC_YEAR);
+#endif
+#ifdef CONFIG_ACPI
+	if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
+	    acpi_gbl_FADT.century)
+		century = CMOS_READ(acpi_gbl_FADT.century);
 #endif
 	ctrl = CMOS_READ(RTC_CONTROL);
 	spin_unlock_irqrestore(&rtc_lock, flags);
@@ -90,12 +99,16 @@ static inline unsigned int __get_rtc_time(struct rtc_time *time)
 		time->tm_mday = bcd2bin(time->tm_mday);
 		time->tm_mon = bcd2bin(time->tm_mon);
 		time->tm_year = bcd2bin(time->tm_year);
+		century = bcd2bin(century);
 	}
 
 #ifdef CONFIG_MACH_DECSTATION
 	time->tm_year += real_year - 72;
 #endif
 
+	if (century)
+		time->tm_year += (century - 19) * 100;
+
 	/*
 	 * Account for differences between how the RTC uses the values
 	 * and how they are defined in a struct rtc_time;
@@ -122,6 +135,7 @@ static inline int __set_rtc_time(struct rtc_time *time)
 #ifdef CONFIG_MACH_DECSTATION
 	unsigned int real_yrs, leap_yr;
 #endif
+	unsigned char century = 0;
 
 	yrs = time->tm_year;
 	mon = time->tm_mon + 1;   /* tm_mon starts at zero */
@@ -150,6 +164,15 @@ static inline int __set_rtc_time(struct rtc_time *time)
 		yrs = 73;
 	}
 #endif
+
+#ifdef CONFIG_ACPI
+	if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
+	    acpi_gbl_FADT.century) {
+		century = (yrs + 1900) / 100;
+		yrs %= 100;
+	}
+#endif
+
 	/* These limits and adjustments are independent of
 	 * whether the chip is in binary mode or not.
 	 */
@@ -169,6 +192,7 @@ static inline int __set_rtc_time(struct rtc_time *time)
 		day = bin2bcd(day);
 		mon = bin2bcd(mon);
 		yrs = bin2bcd(yrs);
+		century = bin2bcd(century);
 	}
 
 	save_control = CMOS_READ(RTC_CONTROL);
@@ -185,6 +209,11 @@ static inline int __set_rtc_time(struct rtc_time *time)
 	CMOS_WRITE(hrs, RTC_HOURS);
 	CMOS_WRITE(min, RTC_MINUTES);
 	CMOS_WRITE(sec, RTC_SECONDS);
+#ifdef CONFIG_ACPI
+	if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
+	    acpi_gbl_FADT.century)
+		CMOS_WRITE(century, acpi_gbl_FADT.century);
+#endif
 
 	CMOS_WRITE(save_control, RTC_CONTROL);
 	CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);

From a3781639e148497d7381d8786aaefe9f8b7e3028 Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vz@mleia.com>
Date: Mon, 27 Jul 2015 00:48:26 +0300
Subject: [PATCH 530/734] rtc: cmos: clean up
 cmos_nvram_read()/cmos_nvram_write()

The change removes redundant sysfs binary file boundary checks, since
this task is already done on caller side in fs/sysfs/file.c

Signed-off-by: Vladimir Zapolskiy <vz@mleia.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-cmos.c | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index c2e90c62a59cab..8f7034ba7d9e30 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -467,13 +467,6 @@ cmos_nvram_read(struct file *filp, struct kobject *kobj,
 {
 	int	retval;
 
-	if (unlikely(off >= attr->size))
-		return 0;
-	if (unlikely(off < 0))
-		return -EINVAL;
-	if ((off + count) > attr->size)
-		count = attr->size - off;
-
 	off += NVRAM_OFFSET;
 	spin_lock_irq(&rtc_lock);
 	for (retval = 0; count; count--, off++, retval++) {
@@ -498,12 +491,6 @@ cmos_nvram_write(struct file *filp, struct kobject *kobj,
 	int		retval;
 
 	cmos = dev_get_drvdata(container_of(kobj, struct device, kobj));
-	if (unlikely(off >= attr->size))
-		return -EFBIG;
-	if (unlikely(off < 0))
-		return -EINVAL;
-	if ((off + count) > attr->size)
-		count = attr->size - off;
 
 	/* NOTE:  on at least PCs and Ataris, the boot firmware uses a
 	 * checksum on part of the NVRAM data.  That's currently ignored

From 273e03041e6d48024a57682cbf7b61510f74ec64 Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vz@mleia.com>
Date: Mon, 27 Jul 2015 00:48:27 +0300
Subject: [PATCH 531/734] rtc: ds1305: clean up
 ds1305_nvram_read()/ds1305_nvram_write()

The change removes redundant sysfs binary file boundary checks, since
this task is already done on caller size in fs/sysfs/file.c

Signed-off-by: Vladimir Zapolskiy <vz@mleia.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ds1305.c | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/drivers/rtc/rtc-ds1305.c b/drivers/rtc/rtc-ds1305.c
index 12b07158a3664d..baa5d047f9c826 100644
--- a/drivers/rtc/rtc-ds1305.c
+++ b/drivers/rtc/rtc-ds1305.c
@@ -538,15 +538,6 @@ ds1305_nvram_read(struct file *filp, struct kobject *kobj,
 
 	spi = container_of(kobj, struct spi_device, dev.kobj);
 
-	if (unlikely(off >= DS1305_NVRAM_LEN))
-		return 0;
-	if (count >= DS1305_NVRAM_LEN)
-		count = DS1305_NVRAM_LEN;
-	if ((off + count) > DS1305_NVRAM_LEN)
-		count = DS1305_NVRAM_LEN - off;
-	if (unlikely(!count))
-		return count;
-
 	addr = DS1305_NVRAM + off;
 	msg_init(&m, x, &addr, count, NULL, buf);
 
@@ -569,15 +560,6 @@ ds1305_nvram_write(struct file *filp, struct kobject *kobj,
 
 	spi = container_of(kobj, struct spi_device, dev.kobj);
 
-	if (unlikely(off >= DS1305_NVRAM_LEN))
-		return -EFBIG;
-	if (count >= DS1305_NVRAM_LEN)
-		count = DS1305_NVRAM_LEN;
-	if ((off + count) > DS1305_NVRAM_LEN)
-		count = DS1305_NVRAM_LEN - off;
-	if (unlikely(!count))
-		return count;
-
 	addr = (DS1305_WRITE | DS1305_NVRAM) + off;
 	msg_init(&m, x, &addr, count, buf, NULL);
 

From ca7c14d3a1ab1932732c506e32ab5b189b9c0a16 Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vz@mleia.com>
Date: Mon, 27 Jul 2015 00:48:28 +0300
Subject: [PATCH 532/734] rtc: ds1307: clean up
 ds1307_nvram_read()/ds1307_nvram_write()

The change removes redundant sysfs binary file boundary checks, since
this task is already done on caller side in fs/sysfs/file.c

Signed-off-by: Vladimir Zapolskiy <vz@mleia.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ds1307.c | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index c51bc0a65afce4..a705e6490808f2 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -782,13 +782,6 @@ ds1307_nvram_read(struct file *filp, struct kobject *kobj,
 	client = kobj_to_i2c_client(kobj);
 	ds1307 = i2c_get_clientdata(client);
 
-	if (unlikely(off >= ds1307->nvram->size))
-		return 0;
-	if ((off + count) > ds1307->nvram->size)
-		count = ds1307->nvram->size - off;
-	if (unlikely(!count))
-		return count;
-
 	result = ds1307->read_block_data(client, ds1307->nvram_offset + off,
 								count, buf);
 	if (result < 0)
@@ -808,13 +801,6 @@ ds1307_nvram_write(struct file *filp, struct kobject *kobj,
 	client = kobj_to_i2c_client(kobj);
 	ds1307 = i2c_get_clientdata(client);
 
-	if (unlikely(off >= ds1307->nvram->size))
-		return -EFBIG;
-	if ((off + count) > ds1307->nvram->size)
-		count = ds1307->nvram->size - off;
-	if (unlikely(!count))
-		return count;
-
 	result = ds1307->write_block_data(client, ds1307->nvram_offset + off,
 								count, buf);
 	if (result < 0) {

From f4843b19d2c7b5effd25e2a65b487d02bf55e96f Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vz@mleia.com>
Date: Mon, 27 Jul 2015 00:48:29 +0300
Subject: [PATCH 533/734] rtc: ds1343: clean up
 ds1343_nvram_read()/ds1343_nvram_write()

The change removes redundant sysfs binary file boundary checks, since
this task is already done on caller side in fs/sysfs/file.c

Signed-off-by: Vladimir Zapolskiy <vz@mleia.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ds1343.c | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/drivers/rtc/rtc-ds1343.c b/drivers/rtc/rtc-ds1343.c
index ae9f997223b1f4..79a06dd3c1856b 100644
--- a/drivers/rtc/rtc-ds1343.c
+++ b/drivers/rtc/rtc-ds1343.c
@@ -162,12 +162,6 @@ static ssize_t ds1343_nvram_write(struct file *filp, struct kobject *kobj,
 	struct device *dev = kobj_to_dev(kobj);
 	struct ds1343_priv *priv = dev_get_drvdata(dev);
 
-	if (unlikely(!count))
-		return count;
-
-	if ((count + off) > DS1343_NVRAM_LEN)
-		count = DS1343_NVRAM_LEN - off;
-
 	address = DS1343_NVRAM + off;
 
 	ret = regmap_bulk_write(priv->map, address, buf, count);
@@ -187,12 +181,6 @@ static ssize_t ds1343_nvram_read(struct file *filp, struct kobject *kobj,
 	struct device *dev = kobj_to_dev(kobj);
 	struct ds1343_priv *priv = dev_get_drvdata(dev);
 
-	if (unlikely(!count))
-		return count;
-
-	if ((count + off) > DS1343_NVRAM_LEN)
-		count = DS1343_NVRAM_LEN - off;
-
 	address = DS1343_NVRAM + off;
 
 	ret = regmap_bulk_read(priv->map, address, buf, count);

From 8ccba14233000f48639aad4cab55ed7b61260db5 Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vz@mleia.com>
Date: Wed, 5 Aug 2015 21:12:58 +0300
Subject: [PATCH 534/734] rtc: ds1511: clean up
 ds1511_nvram_read()/ds1511_nvram_write()

The change removes redundant sysfs binary file boundary checks, since
this task is already done on caller side in fs/sysfs/file.c

The change enables burst mode of access to SRAM for any read()/write()
operations, it is worth to mention that this may influence on
userspace, for instance prior to the change

  read(fd, buf, 1);
  read(fd, buf + 1, 1);

and

  read(fd, buf, 2);

sequences of syscalls over DS1511's sysfs "nvram" fd led to different
DS1511 state changes and/or buf content, if some userspace applications
are written specifically for DS1511 and exploit this strange
"feature", they may be impacted.

Also the change corrects NVRAM size accessible to userspace from 255
bytes to 256 bytes.

Signed-off-by: Vladimir Zapolskiy <vz@mleia.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ds1511.c | 42 +++++-----------------------------------
 1 file changed, 5 insertions(+), 37 deletions(-)

diff --git a/drivers/rtc/rtc-ds1511.c b/drivers/rtc/rtc-ds1511.c
index 7415c2b4d6e8e5..da3d04ce83bd81 100644
--- a/drivers/rtc/rtc-ds1511.c
+++ b/drivers/rtc/rtc-ds1511.c
@@ -64,7 +64,7 @@ enum ds1511reg {
 #define DS1511_KIE	0x04
 #define DS1511_WDE	0x02
 #define DS1511_WDS	0x01
-#define DS1511_RAM_MAX	0xff
+#define DS1511_RAM_MAX	0x100
 
 #define RTC_CMD		DS1511_CONTROL_B
 #define RTC_CMD1	DS1511_CONTROL_A
@@ -159,7 +159,7 @@ ds1511_wdog_set(unsigned long deciseconds)
 	/*
 	 * set wdog enable and wdog 'steering' bit to issue a reset
 	 */
-	rtc_write(DS1511_WDE | DS1511_WDS, RTC_CMD);
+	rtc_write(rtc_read(RTC_CMD) | DS1511_WDE | DS1511_WDS, RTC_CMD);
 }
 
 void
@@ -407,26 +407,10 @@ ds1511_nvram_read(struct file *filp, struct kobject *kobj,
 {
 	ssize_t count;
 
-	/*
-	 * if count is more than one, turn on "burst" mode
-	 * turn it off when you're done
-	 */
-	if (size > 1)
-		rtc_write((rtc_read(RTC_CMD) | DS1511_BME), RTC_CMD);
-
-	if (pos > DS1511_RAM_MAX)
-		pos = DS1511_RAM_MAX;
-
-	if (size + pos > DS1511_RAM_MAX + 1)
-		size = DS1511_RAM_MAX - pos + 1;
-
 	rtc_write(pos, DS1511_RAMADDR_LSB);
-	for (count = 0; size > 0; count++, size--)
+	for (count = 0; count < size; count++)
 		*buf++ = rtc_read(DS1511_RAMDATA);
 
-	if (count > 1)
-		rtc_write((rtc_read(RTC_CMD) & ~DS1511_BME), RTC_CMD);
-
 	return count;
 }
 
@@ -437,26 +421,10 @@ ds1511_nvram_write(struct file *filp, struct kobject *kobj,
 {
 	ssize_t count;
 
-	/*
-	 * if count is more than one, turn on "burst" mode
-	 * turn it off when you're done
-	 */
-	if (size > 1)
-		rtc_write((rtc_read(RTC_CMD) | DS1511_BME), RTC_CMD);
-
-	if (pos > DS1511_RAM_MAX)
-		pos = DS1511_RAM_MAX;
-
-	if (size + pos > DS1511_RAM_MAX + 1)
-		size = DS1511_RAM_MAX - pos + 1;
-
 	rtc_write(pos, DS1511_RAMADDR_LSB);
-	for (count = 0; size > 0; count++, size--)
+	for (count = 0; count < size; count++)
 		rtc_write(*buf++, DS1511_RAMDATA);
 
-	if (count > 1)
-		rtc_write((rtc_read(RTC_CMD) & ~DS1511_BME), RTC_CMD);
-
 	return count;
 }
 
@@ -490,7 +458,7 @@ static int ds1511_rtc_probe(struct platform_device *pdev)
 	/*
 	 * turn on the clock and the crystal, etc.
 	 */
-	rtc_write(0, RTC_CMD);
+	rtc_write(DS1511_BME, RTC_CMD);
 	rtc_write(0, RTC_CMD1);
 	/*
 	 * clear the wdog counter

From a9118d77b3be366c4843446d671a52ccfddcc15c Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vz@mleia.com>
Date: Mon, 27 Jul 2015 00:48:31 +0300
Subject: [PATCH 535/734] rtc: ds1553: clean up
 ds1553_nvram_read()/ds1553_nvram_write()

The change removes redundant sysfs binary file boundary checks, since
this task is already done on caller side in fs/sysfs/file.c

Signed-off-by: Vladimir Zapolskiy <vz@mleia.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ds1553.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c
index a24e091bcb41d8..38422ab4ec5a12 100644
--- a/drivers/rtc/rtc-ds1553.c
+++ b/drivers/rtc/rtc-ds1553.c
@@ -245,7 +245,7 @@ static ssize_t ds1553_nvram_read(struct file *filp, struct kobject *kobj,
 	void __iomem *ioaddr = pdata->ioaddr;
 	ssize_t count;
 
-	for (count = 0; size > 0 && pos < RTC_OFFSET; count++, size--)
+	for (count = 0; count < size; count++)
 		*buf++ = readb(ioaddr + pos++);
 	return count;
 }
@@ -260,7 +260,7 @@ static ssize_t ds1553_nvram_write(struct file *filp, struct kobject *kobj,
 	void __iomem *ioaddr = pdata->ioaddr;
 	ssize_t count;
 
-	for (count = 0; size > 0 && pos < RTC_OFFSET; count++, size--)
+	for (count = 0; count < size; count++)
 		writeb(*buf++, ioaddr + pos++);
 	return count;
 }

From c472d7ded2dfbe7da29531a9c9e6b951e5658605 Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vz@mleia.com>
Date: Mon, 27 Jul 2015 00:48:32 +0300
Subject: [PATCH 536/734] rtc: ds1742: clean up
 ds1742_nvram_read()/ds1742_nvram_write()

The change removes redundant sysfs binary file boundary checks, since
this task is already done on caller side in fs/sysfs/file.c

Signed-off-by: Vladimir Zapolskiy <vz@mleia.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ds1742.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c
index 0f8d8ace15156f..c5168b3bcf1a60 100644
--- a/drivers/rtc/rtc-ds1742.c
+++ b/drivers/rtc/rtc-ds1742.c
@@ -134,7 +134,7 @@ static ssize_t ds1742_nvram_read(struct file *filp, struct kobject *kobj,
 	void __iomem *ioaddr = pdata->ioaddr_nvram;
 	ssize_t count;
 
-	for (count = 0; size > 0 && pos < pdata->size_nvram; count++, size--)
+	for (count = 0; count < size; count++)
 		*buf++ = readb(ioaddr + pos++);
 	return count;
 }
@@ -149,7 +149,7 @@ static ssize_t ds1742_nvram_write(struct file *filp, struct kobject *kobj,
 	void __iomem *ioaddr = pdata->ioaddr_nvram;
 	ssize_t count;
 
-	for (count = 0; size > 0 && pos < pdata->size_nvram; count++, size--)
+	for (count = 0; count < size; count++)
 		writeb(*buf++, ioaddr + pos++);
 	return count;
 }

From 99be3e371b2ac03b936044139be10cf0c7b9cc58 Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vz@mleia.com>
Date: Wed, 5 Aug 2015 21:13:02 +0300
Subject: [PATCH 537/734] rtc: m48t59: clean up
 m48t59_nvram_read()/m48t59_nvram_write()

The change removes redundant sysfs binary file boundary checks, since
this task is already done on caller side in fs/sysfs/file.c

Spinlock acquisition/release is moved out of the loop body to get
atomic states of NVRAM reading and writing operations.

Signed-off-by: Vladimir Zapolskiy <vz@mleia.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-m48t59.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/rtc/rtc-m48t59.c b/drivers/rtc/rtc-m48t59.c
index 90abb5bd589c8e..d99a705bec07ac 100644
--- a/drivers/rtc/rtc-m48t59.c
+++ b/drivers/rtc/rtc-m48t59.c
@@ -345,11 +345,12 @@ static ssize_t m48t59_nvram_read(struct file *filp, struct kobject *kobj,
 	ssize_t cnt = 0;
 	unsigned long flags;
 
-	for (; size > 0 && pos < pdata->offset; cnt++, size--) {
-		spin_lock_irqsave(&m48t59->lock, flags);
+	spin_lock_irqsave(&m48t59->lock, flags);
+
+	for (; cnt < size; cnt++)
 		*buf++ = M48T59_READ(cnt);
-		spin_unlock_irqrestore(&m48t59->lock, flags);
-	}
+
+	spin_unlock_irqrestore(&m48t59->lock, flags);
 
 	return cnt;
 }
@@ -365,11 +366,12 @@ static ssize_t m48t59_nvram_write(struct file *filp, struct kobject *kobj,
 	ssize_t cnt = 0;
 	unsigned long flags;
 
-	for (; size > 0 && pos < pdata->offset; cnt++, size--) {
-		spin_lock_irqsave(&m48t59->lock, flags);
+	spin_lock_irqsave(&m48t59->lock, flags);
+
+	for (; cnt < size; cnt++)
 		M48T59_WRITE(*buf++, cnt);
-		spin_unlock_irqrestore(&m48t59->lock, flags);
-	}
+
+	spin_unlock_irqrestore(&m48t59->lock, flags);
 
 	return cnt;
 }

From d1cf4bdee8e574c0d2b7ce9fa8d9cd89c0ac761a Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vz@mleia.com>
Date: Mon, 27 Jul 2015 00:48:34 +0300
Subject: [PATCH 538/734] rtc: rp5c01: clean up
 rp5c01_nvram_read()/rp5c01_nvram_write()

The change removes redundant sysfs binary file boundary checks, since
this task is already done on caller side in fs/sysfs/file.c

Signed-off-by: Vladimir Zapolskiy <vz@mleia.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-rp5c01.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/rtc/rtc-rp5c01.c b/drivers/rtc/rtc-rp5c01.c
index b548551f385ccd..026035373ae65a 100644
--- a/drivers/rtc/rtc-rp5c01.c
+++ b/drivers/rtc/rtc-rp5c01.c
@@ -170,7 +170,7 @@ static ssize_t rp5c01_nvram_read(struct file *filp, struct kobject *kobj,
 
 	spin_lock_irq(&priv->lock);
 
-	for (count = 0; size > 0 && pos < RP5C01_MODE; count++, size--) {
+	for (count = 0; count < size; count++) {
 		u8 data;
 
 		rp5c01_write(priv,
@@ -200,7 +200,7 @@ static ssize_t rp5c01_nvram_write(struct file *filp, struct kobject *kobj,
 
 	spin_lock_irq(&priv->lock);
 
-	for (count = 0; size > 0 && pos < RP5C01_MODE; count++, size--) {
+	for (count = 0; count < size; count++) {
 		u8 data = *buf++;
 
 		rp5c01_write(priv,

From ecc663c3d0dc181fd15e4d60753024d180fd7e8f Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vz@mleia.com>
Date: Mon, 27 Jul 2015 00:48:35 +0300
Subject: [PATCH 539/734] rtc: stk17ta8: clean up
 stk17ta8_nvram_read()/stk17ta8_nvram_write()

The change removes redundant sysfs binary file boundary checks, since
this task is already done on caller side in fs/sysfs/file.c

Signed-off-by: Vladimir Zapolskiy <vz@mleia.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-stk17ta8.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/rtc/rtc-stk17ta8.c b/drivers/rtc/rtc-stk17ta8.c
index 0e93b714ee4148..ba6a83b5b5c9a0 100644
--- a/drivers/rtc/rtc-stk17ta8.c
+++ b/drivers/rtc/rtc-stk17ta8.c
@@ -254,7 +254,7 @@ static ssize_t stk17ta8_nvram_read(struct file *filp, struct kobject *kobj,
 	void __iomem *ioaddr = pdata->ioaddr;
 	ssize_t count;
 
-	for (count = 0; size > 0 && pos < RTC_OFFSET; count++, size--)
+	for (count = 0; count < size; count++)
 		*buf++ = readb(ioaddr + pos++);
 	return count;
 }
@@ -269,7 +269,7 @@ static ssize_t stk17ta8_nvram_write(struct file *filp, struct kobject *kobj,
 	void __iomem *ioaddr = pdata->ioaddr;
 	ssize_t count;
 
-	for (count = 0; size > 0 && pos < RTC_OFFSET; count++, size--)
+	for (count = 0; count < size; count++)
 		writeb(*buf++, ioaddr + pos++);
 	return count;
 }

From 824625d3926f0caf4753426d2b24a8fc08a406c8 Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vz@mleia.com>
Date: Mon, 27 Jul 2015 00:48:36 +0300
Subject: [PATCH 540/734] rtc: tx4939: clean up
 tx4939_rtc_nvram_read()/tx4939_rtc_nvram_write()

The change removes redundant sysfs binary file boundary checks, since
this task is already done on caller side in fs/sysfs/file.c

Signed-off-by: Vladimir Zapolskiy <vz@mleia.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-tx4939.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/rtc/rtc-tx4939.c b/drivers/rtc/rtc-tx4939.c
index cb7f94ede5165b..560d9a5e02253f 100644
--- a/drivers/rtc/rtc-tx4939.c
+++ b/drivers/rtc/rtc-tx4939.c
@@ -199,8 +199,7 @@ static ssize_t tx4939_rtc_nvram_read(struct file *filp, struct kobject *kobj,
 	ssize_t count;
 
 	spin_lock_irq(&pdata->lock);
-	for (count = 0; size > 0 && pos < TX4939_RTC_REG_RAMSIZE;
-	     count++, size--) {
+	for (count = 0; count < size; count++) {
 		__raw_writel(pos++, &rtcreg->adr);
 		*buf++ = __raw_readl(&rtcreg->dat);
 	}
@@ -218,8 +217,7 @@ static ssize_t tx4939_rtc_nvram_write(struct file *filp, struct kobject *kobj,
 	ssize_t count;
 
 	spin_lock_irq(&pdata->lock);
-	for (count = 0; size > 0 && pos < TX4939_RTC_REG_RAMSIZE;
-	     count++, size--) {
+	for (count = 0; count < size; count++) {
 		__raw_writel(pos++, &rtcreg->adr);
 		__raw_writel(*buf++, &rtcreg->dat);
 	}

From 15d3bdc23eb54c50b2a5f143325fe83c3ab0dd27 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Fri, 24 Jul 2015 15:50:23 +0200
Subject: [PATCH 541/734] rtc: rx8025: remove useless headers and reorder them

Remove the useless includes and order the remaining ones alphabetically.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-rx8025.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c
index 6fe87702fcff3f..a943e1c0826373 100644
--- a/drivers/rtc/rtc-rx8025.c
+++ b/drivers/rtc/rtc-rx8025.c
@@ -18,13 +18,10 @@
  * modify it under the terms of the GNU General Public License
  * version 2 as published by the Free Software Foundation.
  */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/init.h>
 #include <linux/bcd.h>
 #include <linux/i2c.h>
-#include <linux/list.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/rtc.h>
 
 /* Register definitions */

From b6a57c955c362cb9d6ace991cdd77376849abb44 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Fri, 24 Jul 2015 15:59:43 +0200
Subject: [PATCH 542/734] rtc: rx8025: Convert to threaded IRQ

The driver currently emulates the concept of threaded IRQ using a
workqueue, switch to threaded IRQ instead.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-rx8025.c | 37 ++++---------------------------------
 1 file changed, 4 insertions(+), 33 deletions(-)

diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c
index a943e1c0826373..d8737713135d88 100644
--- a/drivers/rtc/rtc-rx8025.c
+++ b/drivers/rtc/rtc-rx8025.c
@@ -71,9 +71,7 @@ MODULE_DEVICE_TABLE(i2c, rx8025_id);
 struct rx8025_data {
 	struct i2c_client *client;
 	struct rtc_device *rtc;
-	struct work_struct work;
 	u8 ctrl1;
-	unsigned exiting:1;
 };
 
 static int rx8025_read_reg(struct i2c_client *client, int number, u8 *value)
@@ -128,26 +126,12 @@ static int rx8025_write_regs(struct i2c_client *client,
 	return ret;
 }
 
-static irqreturn_t rx8025_irq(int irq, void *dev_id)
+static irqreturn_t rx8025_handle_irq(int irq, void *dev_id)
 {
 	struct i2c_client *client = dev_id;
 	struct rx8025_data *rx8025 = i2c_get_clientdata(client);
-
-	disable_irq_nosync(irq);
-	schedule_work(&rx8025->work);
-	return IRQ_HANDLED;
-}
-
-static void rx8025_work(struct work_struct *work)
-{
-	struct rx8025_data *rx8025 = container_of(work, struct rx8025_data,
-						  work);
-	struct i2c_client *client = rx8025->client;
-	struct mutex *lock = &rx8025->rtc->ops_lock;
 	u8 status;
 
-	mutex_lock(lock);
-
 	if (rx8025_read_reg(client, RX8025_REG_CTRL2, &status))
 		goto out;
 
@@ -175,10 +159,7 @@ static void rx8025_work(struct work_struct *work)
 			 status | RX8025_BIT_CTRL2_XST);
 
 out:
-	if (!rx8025->exiting)
-		enable_irq(client->irq);
-
-	mutex_unlock(lock);
+	return IRQ_HANDLED;
 }
 
 static int rx8025_get_time(struct device *dev, struct rtc_time *dt)
@@ -550,7 +531,6 @@ static int rx8025_probe(struct i2c_client *client,
 
 	rx8025->client = client;
 	i2c_set_clientdata(client, rx8025);
-	INIT_WORK(&rx8025->work, rx8025_work);
 
 	err = rx8025_init_client(client, &need_reset);
 	if (err)
@@ -574,7 +554,7 @@ static int rx8025_probe(struct i2c_client *client,
 
 	if (client->irq > 0) {
 		dev_info(&client->dev, "IRQ %d supplied\n", client->irq);
-		err = request_irq(client->irq, rx8025_irq,
+		err = request_threaded_irq(client->irq, NULL, rx8025_handle_irq,
 				  0, "rx8025", client);
 		if (err) {
 			dev_err(&client->dev, "unable to request IRQ\n");
@@ -602,17 +582,8 @@ static int rx8025_probe(struct i2c_client *client,
 
 static int rx8025_remove(struct i2c_client *client)
 {
-	struct rx8025_data *rx8025 = i2c_get_clientdata(client);
-	struct mutex *lock = &rx8025->rtc->ops_lock;
-
-	if (client->irq > 0) {
-		mutex_lock(lock);
-		rx8025->exiting = 1;
-		mutex_unlock(lock);
-
+	if (client->irq > 0)
 		free_irq(client->irq, client);
-		cancel_work_sync(&rx8025->work);
-	}
 
 	rx8025_sysfs_unregister(&client->dev);
 	return 0;

From f0b63a1d5a4a4d9d60fe728ede0fa78ec5aab8be Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Fri, 24 Jul 2015 16:07:30 +0200
Subject: [PATCH 543/734] rtc: rx8025: switch to managed irq allocation

Use devm_request_threaded_irq() so it is not necessary to call free_irq()
explicitly.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-rx8025.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c
index d8737713135d88..be91e922a139d2 100644
--- a/drivers/rtc/rtc-rx8025.c
+++ b/drivers/rtc/rtc-rx8025.c
@@ -554,8 +554,9 @@ static int rx8025_probe(struct i2c_client *client,
 
 	if (client->irq > 0) {
 		dev_info(&client->dev, "IRQ %d supplied\n", client->irq);
-		err = request_threaded_irq(client->irq, NULL, rx8025_handle_irq,
-				  0, "rx8025", client);
+		err = devm_request_threaded_irq(&client->dev, client->irq, NULL,
+						rx8025_handle_irq, 0, "rx8025",
+						client);
 		if (err) {
 			dev_err(&client->dev, "unable to request IRQ\n");
 			goto errout;
@@ -567,14 +568,10 @@ static int rx8025_probe(struct i2c_client *client,
 
 	err = rx8025_sysfs_register(&client->dev);
 	if (err)
-		goto errout_irq;
+		goto errout;
 
 	return 0;
 
-errout_irq:
-	if (client->irq > 0)
-		free_irq(client->irq, client);
-
 errout:
 	dev_err(&adapter->dev, "probing for rx8025 failed\n");
 	return err;
@@ -582,9 +579,6 @@ static int rx8025_probe(struct i2c_client *client,
 
 static int rx8025_remove(struct i2c_client *client)
 {
-	if (client->irq > 0)
-		free_irq(client->irq, client);
-
 	rx8025_sysfs_unregister(&client->dev);
 	return 0;
 }

From dbcce7cf1eb95c704faa3fff1acc974010c8e5a4 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Fri, 24 Jul 2015 16:12:10 +0200
Subject: [PATCH 544/734] rtc: rx8025: remove useless probe error message

It is useless to print a message when probe fails as the user is already
aware that it failed.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-rx8025.c | 22 ++++++----------------
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c
index be91e922a139d2..612b6876cbd5c5 100644
--- a/drivers/rtc/rtc-rx8025.c
+++ b/drivers/rtc/rtc-rx8025.c
@@ -513,20 +513,18 @@ static int rx8025_probe(struct i2c_client *client,
 {
 	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
 	struct rx8025_data *rx8025;
-	int err, need_reset = 0;
+	int err = 0, need_reset = 0;
 
 	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA
 				     | I2C_FUNC_SMBUS_I2C_BLOCK)) {
 		dev_err(&adapter->dev,
 			"doesn't support required functionality\n");
-		err = -EIO;
-		goto errout;
+		return -EIO;
 	}
 
 	rx8025 = devm_kzalloc(&client->dev, sizeof(*rx8025), GFP_KERNEL);
 	if (!rx8025) {
-		err = -ENOMEM;
-		goto errout;
+		return -ENOMEM;
 	}
 
 	rx8025->client = client;
@@ -534,7 +532,7 @@ static int rx8025_probe(struct i2c_client *client,
 
 	err = rx8025_init_client(client, &need_reset);
 	if (err)
-		goto errout;
+		return err;
 
 	if (need_reset) {
 		struct rtc_time tm;
@@ -547,9 +545,8 @@ static int rx8025_probe(struct i2c_client *client,
 	rx8025->rtc = devm_rtc_device_register(&client->dev, client->name,
 					  &rx8025_rtc_ops, THIS_MODULE);
 	if (IS_ERR(rx8025->rtc)) {
-		err = PTR_ERR(rx8025->rtc);
 		dev_err(&client->dev, "unable to register the class device\n");
-		goto errout;
+		return PTR_ERR(rx8025->rtc);
 	}
 
 	if (client->irq > 0) {
@@ -559,7 +556,7 @@ static int rx8025_probe(struct i2c_client *client,
 						client);
 		if (err) {
 			dev_err(&client->dev, "unable to request IRQ\n");
-			goto errout;
+			return err;
 		}
 	}
 
@@ -567,13 +564,6 @@ static int rx8025_probe(struct i2c_client *client,
 	rx8025->rtc->max_user_freq = 1;
 
 	err = rx8025_sysfs_register(&client->dev);
-	if (err)
-		goto errout;
-
-	return 0;
-
-errout:
-	dev_err(&adapter->dev, "probing for rx8025 failed\n");
 	return err;
 }
 

From 2ddd18693cb93126077bd072884dffca423c1e27 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Sat, 25 Jul 2015 11:50:22 +0200
Subject: [PATCH 545/734] rtc: rx8025: use BIT()

Use BIT() instead of hand coding.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-rx8025.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c
index 612b6876cbd5c5..52683eda1da287 100644
--- a/drivers/rtc/rtc-rx8025.c
+++ b/drivers/rtc/rtc-rx8025.c
@@ -19,6 +19,7 @@
  * version 2 as published by the Free Software Foundation.
  */
 #include <linux/bcd.h>
+#include <linux/bitops.h>
 #include <linux/i2c.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -45,17 +46,17 @@
 #define RX8025_BIT_CTRL1_CT	(7 << 0)
 /* 1 Hz periodic level irq */
 #define RX8025_BIT_CTRL1_CT_1HZ	4
-#define RX8025_BIT_CTRL1_TEST	(1 << 3)
-#define RX8025_BIT_CTRL1_1224	(1 << 5)
-#define RX8025_BIT_CTRL1_DALE	(1 << 6)
-#define RX8025_BIT_CTRL1_WALE	(1 << 7)
-
-#define RX8025_BIT_CTRL2_DAFG	(1 << 0)
-#define RX8025_BIT_CTRL2_WAFG	(1 << 1)
-#define RX8025_BIT_CTRL2_CTFG	(1 << 2)
-#define RX8025_BIT_CTRL2_PON	(1 << 4)
-#define RX8025_BIT_CTRL2_XST	(1 << 5)
-#define RX8025_BIT_CTRL2_VDET	(1 << 6)
+#define RX8025_BIT_CTRL1_TEST	BIT(3)
+#define RX8025_BIT_CTRL1_1224	BIT(5)
+#define RX8025_BIT_CTRL1_DALE	BIT(6)
+#define RX8025_BIT_CTRL1_WALE	BIT(7)
+
+#define RX8025_BIT_CTRL2_DAFG	BIT(0)
+#define RX8025_BIT_CTRL2_WAFG	BIT(1)
+#define RX8025_BIT_CTRL2_CTFG	BIT(2)
+#define RX8025_BIT_CTRL2_PON	BIT(4)
+#define RX8025_BIT_CTRL2_XST	BIT(5)
+#define RX8025_BIT_CTRL2_VDET	BIT(6)
 
 /* Clock precision adjustment */
 #define RX8025_ADJ_RESOLUTION	3050 /* in ppb */

From 32672c55951b2633bb93ec6f12734cf17e9d3a14 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Sat, 25 Jul 2015 12:07:37 +0200
Subject: [PATCH 546/734] rtc: rx8025: only handle dates between 2000 and 2099

The hardware is only capable of handling dates between 2000 and 2099,
enforce that.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-rx8025.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c
index 52683eda1da287..3612362b65ac45 100644
--- a/drivers/rtc/rtc-rx8025.c
+++ b/drivers/rtc/rtc-rx8025.c
@@ -188,10 +188,7 @@ static int rx8025_get_time(struct device *dev, struct rtc_time *dt)
 
 	dt->tm_mday = bcd2bin(date[RX8025_REG_MDAY] & 0x3f);
 	dt->tm_mon = bcd2bin(date[RX8025_REG_MONTH] & 0x1f) - 1;
-	dt->tm_year = bcd2bin(date[RX8025_REG_YEAR]);
-
-	if (dt->tm_year < 70)
-		dt->tm_year += 100;
+	dt->tm_year = bcd2bin(date[RX8025_REG_YEAR]) + 100;
 
 	dev_dbg(dev, "%s: date %ds %dm %dh %dmd %dm %dy\n", __func__,
 		dt->tm_sec, dt->tm_min, dt->tm_hour,
@@ -205,11 +202,8 @@ static int rx8025_set_time(struct device *dev, struct rtc_time *dt)
 	struct rx8025_data *rx8025 = dev_get_drvdata(dev);
 	u8 date[7];
 
-	/*
-	 * BUG: The HW assumes every year that is a multiple of 4 to be a leap
-	 * year.  Next time this is wrong is 2100, which will not be a leap
-	 * year.
-	 */
+	if ((dt->tm_year < 100) || (dt->tm_year > 199))
+		return -EINVAL;
 
 	/*
 	 * Here the read-only bits are written as "0".  I'm not sure if that
@@ -226,7 +220,7 @@ static int rx8025_set_time(struct device *dev, struct rtc_time *dt)
 	date[RX8025_REG_WDAY] = bin2bcd(dt->tm_wday);
 	date[RX8025_REG_MDAY] = bin2bcd(dt->tm_mday);
 	date[RX8025_REG_MONTH] = bin2bcd(dt->tm_mon + 1);
-	date[RX8025_REG_YEAR] = bin2bcd(dt->tm_year % 100);
+	date[RX8025_REG_YEAR] = bin2bcd(dt->tm_year - 100);
 
 	dev_dbg(dev,
 		"%s: write 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x\n",

From 2e10e74df72ff0f8ea65eb1ee6e39ed8278a91bf Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Sat, 25 Jul 2015 15:54:59 +0200
Subject: [PATCH 547/734] rtc: rx8025: fix transfer mode

The datasheet specifies that transfer mode must be 0 for write and either
0x4 (simplified read) or 0 (standard read). 0x8 is not specified, use
standard mode.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-rx8025.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c
index 3612362b65ac45..771558602409e1 100644
--- a/drivers/rtc/rtc-rx8025.c
+++ b/drivers/rtc/rtc-rx8025.c
@@ -77,7 +77,7 @@ struct rx8025_data {
 
 static int rx8025_read_reg(struct i2c_client *client, int number, u8 *value)
 {
-	int ret = i2c_smbus_read_byte_data(client, (number << 4) | 0x08);
+	int ret = i2c_smbus_read_byte_data(client, number << 4);
 
 	if (ret < 0) {
 		dev_err(&client->dev, "Unable to read register #%d\n", number);
@@ -91,7 +91,7 @@ static int rx8025_read_reg(struct i2c_client *client, int number, u8 *value)
 static int rx8025_read_regs(struct i2c_client *client,
 			    int number, u8 length, u8 *values)
 {
-	int ret = i2c_smbus_read_i2c_block_data(client, (number << 4) | 0x08,
+	int ret = i2c_smbus_read_i2c_block_data(client, number << 4,
 						length, values);
 
 	if (ret != length) {
@@ -117,7 +117,7 @@ static int rx8025_write_reg(struct i2c_client *client, int number, u8 value)
 static int rx8025_write_regs(struct i2c_client *client,
 			     int number, u8 length, u8 *values)
 {
-	int ret = i2c_smbus_write_i2c_block_data(client, (number << 4) | 0x08,
+	int ret = i2c_smbus_write_i2c_block_data(client, number << 4,
 						 length, values);
 
 	if (ret)

From 6f0a8cfebb898b88fb0d934d7a44a6d4c98d5285 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Sun, 26 Jul 2015 10:13:31 +0200
Subject: [PATCH 548/734] rtc: rx8025: don't reset the time

Stop setting the time to epoch when it is invalid. The proper way to handle
that is to return an error when it is invalid instead of returning an
incorrect value.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-rx8025.c | 38 ++++++++++++++++++++++----------------
 1 file changed, 22 insertions(+), 16 deletions(-)

diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c
index 771558602409e1..d158a640299e8e 100644
--- a/drivers/rtc/rtc-rx8025.c
+++ b/drivers/rtc/rtc-rx8025.c
@@ -166,9 +166,23 @@ static irqreturn_t rx8025_handle_irq(int irq, void *dev_id)
 static int rx8025_get_time(struct device *dev, struct rtc_time *dt)
 {
 	struct rx8025_data *rx8025 = dev_get_drvdata(dev);
-	u8 date[7];
+	u8 date[7], ctrl;
 	int err;
 
+	err = rx8025_read_reg(rx8025->client, RX8025_REG_CTRL2, &ctrl);
+	if (err)
+		return err;
+
+	if (ctrl & RX8025_BIT_CTRL2_PON) {
+		dev_warn(dev, "power-on reset detected, date is invalid\n");
+		return -EINVAL;
+	}
+
+	if (!(ctrl & RX8025_BIT_CTRL2_XST)) {
+		dev_warn(dev, "crystal stopped, date is invalid\n");
+		return -EINVAL;
+	}
+
 	err = rx8025_read_regs(rx8025->client, RX8025_REG_SEC, 7, date);
 	if (err)
 		return err;
@@ -230,7 +244,7 @@ static int rx8025_set_time(struct device *dev, struct rtc_time *dt)
 	return rx8025_write_regs(rx8025->client, RX8025_REG_SEC, 7, date);
 }
 
-static int rx8025_init_client(struct i2c_client *client, int *need_reset)
+static int rx8025_init_client(struct i2c_client *client)
 {
 	struct rx8025_data *rx8025 = i2c_get_clientdata(client);
 	u8 ctrl[2], ctrl2;
@@ -247,19 +261,19 @@ static int rx8025_init_client(struct i2c_client *client, int *need_reset)
 	if (ctrl[1] & RX8025_BIT_CTRL2_PON) {
 		dev_warn(&client->dev, "power-on reset was detected, "
 			 "you may have to readjust the clock\n");
-		*need_reset = 1;
+		need_clear = 1;
 	}
 
 	if (ctrl[1] & RX8025_BIT_CTRL2_VDET) {
 		dev_warn(&client->dev, "a power voltage drop was detected, "
 			 "you may have to readjust the clock\n");
-		*need_reset = 1;
+		need_clear = 1;
 	}
 
 	if (!(ctrl[1] & RX8025_BIT_CTRL2_XST)) {
 		dev_warn(&client->dev, "Oscillation stop was detected,"
 			 "you may have to readjust the clock\n");
-		*need_reset = 1;
+		need_clear = 1;
 	}
 
 	if (ctrl[1] & (RX8025_BIT_CTRL2_DAFG | RX8025_BIT_CTRL2_WAFG)) {
@@ -270,7 +284,7 @@ static int rx8025_init_client(struct i2c_client *client, int *need_reset)
 	if (!(ctrl[1] & RX8025_BIT_CTRL2_CTFG))
 		need_clear = 1;
 
-	if (*need_reset || need_clear) {
+	if (need_clear) {
 		ctrl2 = ctrl[0];
 		ctrl2 &= ~(RX8025_BIT_CTRL2_PON | RX8025_BIT_CTRL2_VDET |
 			   RX8025_BIT_CTRL2_CTFG | RX8025_BIT_CTRL2_WAFG |
@@ -508,7 +522,7 @@ static int rx8025_probe(struct i2c_client *client,
 {
 	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
 	struct rx8025_data *rx8025;
-	int err = 0, need_reset = 0;
+	int err = 0;
 
 	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA
 				     | I2C_FUNC_SMBUS_I2C_BLOCK)) {
@@ -525,18 +539,10 @@ static int rx8025_probe(struct i2c_client *client,
 	rx8025->client = client;
 	i2c_set_clientdata(client, rx8025);
 
-	err = rx8025_init_client(client, &need_reset);
+	err = rx8025_init_client(client);
 	if (err)
 		return err;
 
-	if (need_reset) {
-		struct rtc_time tm;
-		dev_info(&client->dev,
-			 "bad conditions detected, resetting date\n");
-		rtc_time_to_tm(0, &tm);	/* 1970/1/1 */
-		rx8025_set_time(&client->dev, &tm);
-	}
-
 	rx8025->rtc = devm_rtc_device_register(&client->dev, client->name,
 					  &rx8025_rtc_ops, THIS_MODULE);
 	if (IS_ERR(rx8025->rtc)) {

From fd9061fb497926c0e62bf1c7ff727801499fd2ea Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Tue, 4 Aug 2015 00:40:25 +0200
Subject: [PATCH 549/734] rtc: rx8025: cleanup accessors

Remove useless error messages, at that point, the user already knows
something went wrong but will not be able to do anything about it anyway.
It is also highly unlikely that some registers are readable/writable
but not some other ones.

Also, transform rx8025_read_reg to be more resemblant to
i2c_smbus_read_byte_data()

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-rx8025.c | 85 +++++++++++++++-------------------------
 1 file changed, 31 insertions(+), 54 deletions(-)

diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c
index d158a640299e8e..bf96f40fd200f3 100644
--- a/drivers/rtc/rtc-rx8025.c
+++ b/drivers/rtc/rtc-rx8025.c
@@ -75,65 +75,43 @@ struct rx8025_data {
 	u8 ctrl1;
 };
 
-static int rx8025_read_reg(struct i2c_client *client, int number, u8 *value)
+static s32 rx8025_read_reg(const struct i2c_client *client, u8 number)
 {
-	int ret = i2c_smbus_read_byte_data(client, number << 4);
-
-	if (ret < 0) {
-		dev_err(&client->dev, "Unable to read register #%d\n", number);
-		return ret;
-	}
-
-	*value = ret;
-	return 0;
+	return i2c_smbus_read_byte_data(client, number << 4);
 }
 
-static int rx8025_read_regs(struct i2c_client *client,
-			    int number, u8 length, u8 *values)
+static int rx8025_read_regs(const struct i2c_client *client,
+			    u8 number, u8 length, u8 *values)
 {
-	int ret = i2c_smbus_read_i2c_block_data(client, number << 4,
-						length, values);
-
-	if (ret != length) {
-		dev_err(&client->dev, "Unable to read registers #%d..#%d\n",
-			number, number + length - 1);
+	int ret = i2c_smbus_read_i2c_block_data(client, number << 4, length,
+						values);
+	if (ret != length)
 		return ret < 0 ? ret : -EIO;
-	}
 
 	return 0;
 }
 
-static int rx8025_write_reg(struct i2c_client *client, int number, u8 value)
+static s32 rx8025_write_reg(const struct i2c_client *client, u8 number,
+			    u8 value)
 {
-	int ret = i2c_smbus_write_byte_data(client, number << 4, value);
-
-	if (ret)
-		dev_err(&client->dev, "Unable to write register #%d\n",
-			number);
-
-	return ret;
+	return i2c_smbus_write_byte_data(client, number << 4, value);
 }
 
-static int rx8025_write_regs(struct i2c_client *client,
-			     int number, u8 length, u8 *values)
+static s32 rx8025_write_regs(const struct i2c_client *client,
+			     u8 number, u8 length, const u8 *values)
 {
-	int ret = i2c_smbus_write_i2c_block_data(client, number << 4,
-						 length, values);
-
-	if (ret)
-		dev_err(&client->dev, "Unable to write registers #%d..#%d\n",
-			number, number + length - 1);
-
-	return ret;
+	return i2c_smbus_write_i2c_block_data(client, number << 4,
+					      length, values);
 }
 
 static irqreturn_t rx8025_handle_irq(int irq, void *dev_id)
 {
 	struct i2c_client *client = dev_id;
 	struct rx8025_data *rx8025 = i2c_get_clientdata(client);
-	u8 status;
+	int status;
 
-	if (rx8025_read_reg(client, RX8025_REG_CTRL2, &status))
+	status = rx8025_read_reg(client, RX8025_REG_CTRL2);
+	if (status < 0)
 		goto out;
 
 	if (!(status & RX8025_BIT_CTRL2_XST))
@@ -166,12 +144,12 @@ static irqreturn_t rx8025_handle_irq(int irq, void *dev_id)
 static int rx8025_get_time(struct device *dev, struct rtc_time *dt)
 {
 	struct rx8025_data *rx8025 = dev_get_drvdata(dev);
-	u8 date[7], ctrl;
-	int err;
+	u8 date[7];
+	int ctrl, err;
 
-	err = rx8025_read_reg(rx8025->client, RX8025_REG_CTRL2, &ctrl);
-	if (err)
-		return err;
+	ctrl = rx8025_read_reg(rx8025->client, RX8025_REG_CTRL2);
+	if (ctrl < 0)
+		return ctrl;
 
 	if (ctrl & RX8025_BIT_CTRL2_PON) {
 		dev_warn(dev, "power-on reset detected, date is invalid\n");
@@ -302,8 +280,8 @@ static int rx8025_read_alarm(struct device *dev, struct rtc_wkalrm *t)
 {
 	struct rx8025_data *rx8025 = dev_get_drvdata(dev);
 	struct i2c_client *client = rx8025->client;
-	u8 ctrl2, ald[2];
-	int err;
+	u8 ald[2];
+	int ctrl2, err;
 
 	if (client->irq <= 0)
 		return -EINVAL;
@@ -312,9 +290,9 @@ static int rx8025_read_alarm(struct device *dev, struct rtc_wkalrm *t)
 	if (err)
 		return err;
 
-	err = rx8025_read_reg(client, RX8025_REG_CTRL2, &ctrl2);
-	if (err)
-		return err;
+	ctrl2 = rx8025_read_reg(client, RX8025_REG_CTRL2);
+	if (ctrl2 < 0)
+		return ctrl2;
 
 	dev_dbg(dev, "%s: read alarm 0x%02x 0x%02x ctrl2 %02x\n",
 		__func__, ald[0], ald[1], ctrl2);
@@ -435,12 +413,11 @@ static struct rtc_class_ops rx8025_rtc_ops = {
 static int rx8025_get_clock_adjust(struct device *dev, int *adj)
 {
 	struct i2c_client *client = to_i2c_client(dev);
-	u8 digoff;
-	int err;
+	int digoff;
 
-	err = rx8025_read_reg(client, RX8025_REG_DIGOFF, &digoff);
-	if (err)
-		return err;
+	digoff = rx8025_read_reg(client, RX8025_REG_DIGOFF);
+	if (digoff < 0)
+		return digoff;
 
 	*adj = digoff >= 64 ? digoff - 128 : digoff;
 	if (*adj > 0)

From 8a06513df55ef10baf80f55d13786eb29efa4fa6 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Tue, 4 Aug 2015 00:45:37 +0200
Subject: [PATCH 550/734] rtc: rx8025: continue without alarm when irq request
 fails

Instead of bailing out, disable alarms and continue when
devm_request_threaded_irq() fails. This allows to still provide some
functionality.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-rx8025.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c
index bf96f40fd200f3..ab5fb4fe2a835a 100644
--- a/drivers/rtc/rtc-rx8025.c
+++ b/drivers/rtc/rtc-rx8025.c
@@ -533,8 +533,8 @@ static int rx8025_probe(struct i2c_client *client,
 						rx8025_handle_irq, 0, "rx8025",
 						client);
 		if (err) {
-			dev_err(&client->dev, "unable to request IRQ\n");
-			return err;
+			dev_err(&client->dev, "unable to request IRQ, alarms disabled\n");
+			client->irq = 0;
 		}
 	}
 

From a27c7bf657cb4ab893328359b66a584251be6cac Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Tue, 4 Aug 2015 10:46:22 +0200
Subject: [PATCH 551/734] rtc: rx8025: fix rx8025_init_client()

rx8025_init_client is modifying ctrl[0] and writing it to RX8025_REG_CTRL2
but ctrl[0] is actually RX8025_REG_CTRL1.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-rx8025.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c
index ab5fb4fe2a835a..22ce08d6e80770 100644
--- a/drivers/rtc/rtc-rx8025.c
+++ b/drivers/rtc/rtc-rx8025.c
@@ -263,7 +263,7 @@ static int rx8025_init_client(struct i2c_client *client)
 		need_clear = 1;
 
 	if (need_clear) {
-		ctrl2 = ctrl[0];
+		ctrl2 = ctrl[1];
 		ctrl2 &= ~(RX8025_BIT_CTRL2_PON | RX8025_BIT_CTRL2_VDET |
 			   RX8025_BIT_CTRL2_CTFG | RX8025_BIT_CTRL2_WAFG |
 			   RX8025_BIT_CTRL2_DAFG);

From 8c4a4467cda299491eff64640c891a0b2926cb76 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Tue, 4 Aug 2015 10:48:20 +0200
Subject: [PATCH 552/734] rtc: rx8025: reset validity when setting time

Wait for the user to set the time to reset the validity bits. Until then,
the time may be invalid.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-rx8025.c | 31 ++++++++++++++++++++-----------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c
index 22ce08d6e80770..340133a5d90777 100644
--- a/drivers/rtc/rtc-rx8025.c
+++ b/drivers/rtc/rtc-rx8025.c
@@ -104,6 +104,19 @@ static s32 rx8025_write_regs(const struct i2c_client *client,
 					      length, values);
 }
 
+static int rx8025_reset_validity(struct i2c_client *client)
+{
+	int ctrl2 = rx8025_read_reg(client, RX8025_REG_CTRL2);
+
+	if (ctrl2 < 0)
+		return ctrl2;
+
+	ctrl2 &= ~(RX8025_BIT_CTRL2_PON | RX8025_BIT_CTRL2_VDET);
+
+	return rx8025_write_reg(client, RX8025_REG_CTRL2,
+				ctrl2 | RX8025_BIT_CTRL2_XST);
+}
+
 static irqreturn_t rx8025_handle_irq(int irq, void *dev_id)
 {
 	struct i2c_client *client = dev_id;
@@ -133,10 +146,6 @@ static irqreturn_t rx8025_handle_irq(int irq, void *dev_id)
 		rtc_update_irq(rx8025->rtc, 1, RTC_AF | RTC_IRQF);
 	}
 
-	/* acknowledge IRQ */
-	rx8025_write_reg(client, RX8025_REG_CTRL2,
-			 status | RX8025_BIT_CTRL2_XST);
-
 out:
 	return IRQ_HANDLED;
 }
@@ -193,6 +202,7 @@ static int rx8025_set_time(struct device *dev, struct rtc_time *dt)
 {
 	struct rx8025_data *rx8025 = dev_get_drvdata(dev);
 	u8 date[7];
+	int ret;
 
 	if ((dt->tm_year < 100) || (dt->tm_year > 199))
 		return -EINVAL;
@@ -219,7 +229,11 @@ static int rx8025_set_time(struct device *dev, struct rtc_time *dt)
 		__func__,
 		date[0], date[1], date[2], date[3], date[4], date[5], date[6]);
 
-	return rx8025_write_regs(rx8025->client, RX8025_REG_SEC, 7, date);
+	ret = rx8025_write_regs(rx8025->client, RX8025_REG_SEC, 7, date);
+	if (ret < 0)
+		return ret;
+
+	return rx8025_reset_validity(rx8025->client);
 }
 
 static int rx8025_init_client(struct i2c_client *client)
@@ -239,19 +253,16 @@ static int rx8025_init_client(struct i2c_client *client)
 	if (ctrl[1] & RX8025_BIT_CTRL2_PON) {
 		dev_warn(&client->dev, "power-on reset was detected, "
 			 "you may have to readjust the clock\n");
-		need_clear = 1;
 	}
 
 	if (ctrl[1] & RX8025_BIT_CTRL2_VDET) {
 		dev_warn(&client->dev, "a power voltage drop was detected, "
 			 "you may have to readjust the clock\n");
-		need_clear = 1;
 	}
 
 	if (!(ctrl[1] & RX8025_BIT_CTRL2_XST)) {
 		dev_warn(&client->dev, "Oscillation stop was detected,"
 			 "you may have to readjust the clock\n");
-		need_clear = 1;
 	}
 
 	if (ctrl[1] & (RX8025_BIT_CTRL2_DAFG | RX8025_BIT_CTRL2_WAFG)) {
@@ -264,10 +275,8 @@ static int rx8025_init_client(struct i2c_client *client)
 
 	if (need_clear) {
 		ctrl2 = ctrl[1];
-		ctrl2 &= ~(RX8025_BIT_CTRL2_PON | RX8025_BIT_CTRL2_VDET |
-			   RX8025_BIT_CTRL2_CTFG | RX8025_BIT_CTRL2_WAFG |
+		ctrl2 &= ~(RX8025_BIT_CTRL2_CTFG | RX8025_BIT_CTRL2_WAFG |
 			   RX8025_BIT_CTRL2_DAFG);
-		ctrl2 |= RX8025_BIT_CTRL2_XST;
 
 		err = rx8025_write_reg(client, RX8025_REG_CTRL2, ctrl2);
 	}

From 4b33d36b670e7166e8a082e050bfb241ba65271f Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Tue, 4 Aug 2015 10:56:50 +0200
Subject: [PATCH 553/734] rtc: rx8025: remove useless initialization

irq_freq is already initialized to 1 in rtc_device_register()

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-rx8025.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c
index 340133a5d90777..f9b86b92943ff4 100644
--- a/drivers/rtc/rtc-rx8025.c
+++ b/drivers/rtc/rtc-rx8025.c
@@ -547,7 +547,6 @@ static int rx8025_probe(struct i2c_client *client,
 		}
 	}
 
-	rx8025->rtc->irq_freq = 1;
 	rx8025->rtc->max_user_freq = 1;
 
 	err = rx8025_sysfs_register(&client->dev);

From 5c66e1e0b79bd63dcdfbc03b80823522643a1f14 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Tue, 4 Aug 2015 11:24:33 +0200
Subject: [PATCH 554/734] rtc: rx8025: fix RX8025_BIT_CTRL2_CTFG initialization

RX8025_BIT_CTRL2_CTFG was set to 0 only when it was already 0.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-rx8025.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c
index f9b86b92943ff4..f3f1de26c22837 100644
--- a/drivers/rtc/rtc-rx8025.c
+++ b/drivers/rtc/rtc-rx8025.c
@@ -270,7 +270,7 @@ static int rx8025_init_client(struct i2c_client *client)
 		need_clear = 1;
 	}
 
-	if (!(ctrl[1] & RX8025_BIT_CTRL2_CTFG))
+	if (ctrl[1] & RX8025_BIT_CTRL2_CTFG)
 		need_clear = 1;
 
 	if (need_clear) {

From efbbb4fd6b6fe0d3d2cfb3c5bbcdf00f1995cb60 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Tue, 4 Aug 2015 11:33:59 +0200
Subject: [PATCH 555/734] rtc: rx8025: check time validity when necessary

Check time validity when reading time as this is when we need to know.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-rx8025.c | 58 ++++++++++++++++++++--------------------
 1 file changed, 29 insertions(+), 29 deletions(-)

diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c
index f3f1de26c22837..24c3d69ce1b978 100644
--- a/drivers/rtc/rtc-rx8025.c
+++ b/drivers/rtc/rtc-rx8025.c
@@ -104,6 +104,31 @@ static s32 rx8025_write_regs(const struct i2c_client *client,
 					      length, values);
 }
 
+static int rx8025_check_validity(struct device *dev)
+{
+	struct rx8025_data *rx8025 = dev_get_drvdata(dev);
+	int ctrl2;
+
+	ctrl2 = rx8025_read_reg(rx8025->client, RX8025_REG_CTRL2);
+	if (ctrl2 < 0)
+		return ctrl2;
+
+	if (ctrl2 & RX8025_BIT_CTRL2_VDET)
+		dev_warn(dev, "power voltage drop detected\n");
+
+	if (ctrl2 & RX8025_BIT_CTRL2_PON) {
+		dev_warn(dev, "power-on reset detected, date is invalid\n");
+		return -EINVAL;
+	}
+
+	if (!(ctrl2 & RX8025_BIT_CTRL2_XST)) {
+		dev_warn(dev, "crystal stopped, date is invalid\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int rx8025_reset_validity(struct i2c_client *client)
 {
 	int ctrl2 = rx8025_read_reg(client, RX8025_REG_CTRL2);
@@ -154,21 +179,11 @@ static int rx8025_get_time(struct device *dev, struct rtc_time *dt)
 {
 	struct rx8025_data *rx8025 = dev_get_drvdata(dev);
 	u8 date[7];
-	int ctrl, err;
-
-	ctrl = rx8025_read_reg(rx8025->client, RX8025_REG_CTRL2);
-	if (ctrl < 0)
-		return ctrl;
-
-	if (ctrl & RX8025_BIT_CTRL2_PON) {
-		dev_warn(dev, "power-on reset detected, date is invalid\n");
-		return -EINVAL;
-	}
+	int err;
 
-	if (!(ctrl & RX8025_BIT_CTRL2_XST)) {
-		dev_warn(dev, "crystal stopped, date is invalid\n");
-		return -EINVAL;
-	}
+	err = rx8025_check_validity(dev);
+	if (err)
+		return err;
 
 	err = rx8025_read_regs(rx8025->client, RX8025_REG_SEC, 7, date);
 	if (err)
@@ -250,21 +265,6 @@ static int rx8025_init_client(struct i2c_client *client)
 	/* Keep test bit zero ! */
 	rx8025->ctrl1 = ctrl[0] & ~RX8025_BIT_CTRL1_TEST;
 
-	if (ctrl[1] & RX8025_BIT_CTRL2_PON) {
-		dev_warn(&client->dev, "power-on reset was detected, "
-			 "you may have to readjust the clock\n");
-	}
-
-	if (ctrl[1] & RX8025_BIT_CTRL2_VDET) {
-		dev_warn(&client->dev, "a power voltage drop was detected, "
-			 "you may have to readjust the clock\n");
-	}
-
-	if (!(ctrl[1] & RX8025_BIT_CTRL2_XST)) {
-		dev_warn(&client->dev, "Oscillation stop was detected,"
-			 "you may have to readjust the clock\n");
-	}
-
 	if (ctrl[1] & (RX8025_BIT_CTRL2_DAFG | RX8025_BIT_CTRL2_WAFG)) {
 		dev_warn(&client->dev, "Alarm was detected\n");
 		need_clear = 1;

From 0c6e718389305cbc4cd9b8e3fd4b4173bd1127e6 Mon Sep 17 00:00:00 2001
From: Nadav Haklai <nadavh@marvell.com>
Date: Thu, 6 Aug 2015 17:18:48 +0200
Subject: [PATCH 556/734] rtc: armada38x: Align RTC set time procedure with the
 official errata

According to the Armada38x functional errata FE-3124064, writing to
the RTC TIME register may fail. As a workaround, after writing to RTC
TIME register, issue a dummy write of 0x0 twice to the RTC Status
register.  This is the updated implementation of the Errata that
eliminates the need of the long 100ms delay during the RTC set time
procedure.

[gregory.clement@free-electrons.com]: removed the mutex and use the
spinlock again

Signed-off-by: Nadav Haklai <nadavh@marvell.com>
Reviewed-by: Neta Zur Hershkovits <neta@marvell.com>
Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-armada38x.c | 32 ++++++++++++--------------------
 1 file changed, 12 insertions(+), 20 deletions(-)

diff --git a/drivers/rtc/rtc-armada38x.c b/drivers/rtc/rtc-armada38x.c
index 2b08cac62f07a5..06c6bd5eab4114 100644
--- a/drivers/rtc/rtc-armada38x.c
+++ b/drivers/rtc/rtc-armada38x.c
@@ -40,13 +40,6 @@ struct armada38x_rtc {
 	void __iomem	    *regs;
 	void __iomem	    *regs_soc;
 	spinlock_t	    lock;
-	/*
-	 * While setting the time, the RTC TIME register should not be
-	 * accessed. Setting the RTC time involves sleeping during
-	 * 100ms, so a mutex instead of a spinlock is used to protect
-	 * it
-	 */
-	struct mutex	    mutex_time;
 	int		    irq;
 };
 
@@ -64,9 +57,9 @@ static void rtc_delayed_write(u32 val, struct armada38x_rtc *rtc, int offset)
 static int armada38x_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
 	struct armada38x_rtc *rtc = dev_get_drvdata(dev);
-	unsigned long time, time_check;
+	unsigned long time, time_check, flags;
 
-	mutex_lock(&rtc->mutex_time);
+	spin_lock_irqsave(&rtc->lock, flags);
 	time = readl(rtc->regs + RTC_TIME);
 	/*
 	 * WA for failing time set attempts. As stated in HW ERRATA if
@@ -77,7 +70,7 @@ static int armada38x_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	if ((time_check - time) > 1)
 		time_check = readl(rtc->regs + RTC_TIME);
 
-	mutex_unlock(&rtc->mutex_time);
+	spin_unlock_irqrestore(&rtc->lock, flags);
 
 	rtc_time_to_tm(time_check, tm);
 
@@ -88,23 +81,23 @@ static int armada38x_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
 	struct armada38x_rtc *rtc = dev_get_drvdata(dev);
 	int ret = 0;
-	unsigned long time;
+	unsigned long time, flags;
 
 	ret = rtc_tm_to_time(tm, &time);
 
 	if (ret)
 		goto out;
 	/*
-	 * Setting the RTC time not always succeeds. According to the
-	 * errata we need to first write on the status register and
-	 * then wait for 100ms before writing to the time register to be
-	 * sure that the data will be taken into account.
+	 * According to errata FE-3124064, Write to RTC TIME register
+	 * may fail. As a workaround, after writing to RTC TIME
+	 * register, issue a dummy write of 0x0 twice to RTC Status
+	 * register.
 	 */
-	mutex_lock(&rtc->mutex_time);
-	rtc_delayed_write(0, rtc, RTC_STATUS);
-	msleep(100);
+	spin_lock_irqsave(&rtc->lock, flags);
 	rtc_delayed_write(time, rtc, RTC_TIME);
-	mutex_unlock(&rtc->mutex_time);
+	rtc_delayed_write(0, rtc, RTC_STATUS);
+	rtc_delayed_write(0, rtc, RTC_STATUS);
+	spin_unlock_irqrestore(&rtc->lock, flags);
 
 out:
 	return ret;
@@ -229,7 +222,6 @@ static __init int armada38x_rtc_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	spin_lock_init(&rtc->lock);
-	mutex_init(&rtc->mutex_time);
 
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "rtc");
 	rtc->regs = devm_ioremap_resource(&pdev->dev, res);

From 1955f213a68323f7348fc06461017c7675efe6c1 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Mon, 10 Aug 2015 16:33:39 +0200
Subject: [PATCH 557/734] rtc: at91sam9: include linux/of.h

This driver is using device tree but is not including of.h

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-at91sam9.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c
index f9e85ace5e71db..16492e2baf5e12 100644
--- a/drivers/rtc/rtc-at91sam9.c
+++ b/drivers/rtc/rtc-at91sam9.c
@@ -18,6 +18,7 @@
 #include <linux/kernel.h>
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/platform_data/atmel.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>

From 80e274e96e5bc4ddf9ee4b31ab6f4a2a9fa08040 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Mon, 10 Aug 2015 16:33:40 +0200
Subject: [PATCH 558/734] rtc: at91sam9: remove useless include

Definitions from linux/platform_data/atmel.h are not used, remove the
include.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-at91sam9.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c
index 16492e2baf5e12..7206e2fa438375 100644
--- a/drivers/rtc/rtc-at91sam9.c
+++ b/drivers/rtc/rtc-at91sam9.c
@@ -19,7 +19,6 @@
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/platform_data/atmel.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/rtc.h>

From 1fb1c35f56bb6ab4a65920c648154b0f78f634a5 Mon Sep 17 00:00:00 2001
From: Joonyoung Shim <jy0922.shim@samsung.com>
Date: Wed, 12 Aug 2015 19:21:46 +0900
Subject: [PATCH 559/734] rtc: s3c: fix disabled clocks for alarm

The clock enable/disable codes for alarm have been removed from
commit 24e1455493da ("drivers/rtc/rtc-s3c.c: delete duplicate clock
control") and the clocks are disabled even if alarm is set, so alarm
interrupt can't happen.

The s3c_rtc_setaie function can be called several times with 'enabled'
argument having same value, so it needs to check whether clocks are
enabled or not.

Signed-off-by: Joonyoung Shim <jy0922.shim@samsung.com>
Cc: <stable@vger.kernel.org> # v4.1
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-s3c.c | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index a0f83236219907..2e709e239dbcc0 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -39,6 +39,7 @@ struct s3c_rtc {
 	void __iomem *base;
 	struct clk *rtc_clk;
 	struct clk *rtc_src_clk;
+	bool clk_disabled;
 
 	struct s3c_rtc_data *data;
 
@@ -71,9 +72,12 @@ static void s3c_rtc_enable_clk(struct s3c_rtc *info)
 	unsigned long irq_flags;
 
 	spin_lock_irqsave(&info->alarm_clk_lock, irq_flags);
-	clk_enable(info->rtc_clk);
-	if (info->data->needs_src_clk)
-		clk_enable(info->rtc_src_clk);
+	if (info->clk_disabled) {
+		clk_enable(info->rtc_clk);
+		if (info->data->needs_src_clk)
+			clk_enable(info->rtc_src_clk);
+		info->clk_disabled = false;
+	}
 	spin_unlock_irqrestore(&info->alarm_clk_lock, irq_flags);
 }
 
@@ -82,9 +86,12 @@ static void s3c_rtc_disable_clk(struct s3c_rtc *info)
 	unsigned long irq_flags;
 
 	spin_lock_irqsave(&info->alarm_clk_lock, irq_flags);
-	if (info->data->needs_src_clk)
-		clk_disable(info->rtc_src_clk);
-	clk_disable(info->rtc_clk);
+	if (!info->clk_disabled) {
+		if (info->data->needs_src_clk)
+			clk_disable(info->rtc_src_clk);
+		clk_disable(info->rtc_clk);
+		info->clk_disabled = true;
+	}
 	spin_unlock_irqrestore(&info->alarm_clk_lock, irq_flags);
 }
 
@@ -128,6 +135,11 @@ static int s3c_rtc_setaie(struct device *dev, unsigned int enabled)
 
 	s3c_rtc_disable_clk(info);
 
+	if (enabled)
+		s3c_rtc_enable_clk(info);
+	else
+		s3c_rtc_disable_clk(info);
+
 	return 0;
 }
 

From 7f23a93661eb5caa373f75a145cec14331a8caaa Mon Sep 17 00:00:00 2001
From: Joonyoung Shim <jy0922.shim@samsung.com>
Date: Tue, 11 Aug 2015 20:28:19 +0900
Subject: [PATCH 560/734] rtc: s3c: add missing clk control

It's missed to call clk_unprepare() about info->rtc_src_clk in
s3c_rtc_remove and to call clk_disable_unprepare about info->rtc_clk in
error routine of s3c_rtc_probe.

Signed-off-by: Joonyoung Shim <jy0922.shim@samsung.com>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-s3c.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index 2e709e239dbcc0..3ee961529b5b44 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -422,6 +422,8 @@ static int s3c_rtc_remove(struct platform_device *pdev)
 
 	s3c_rtc_setaie(info->dev, 0);
 
+	if (info->data->needs_src_clk)
+		clk_unprepare(info->rtc_src_clk);
 	clk_unprepare(info->rtc_clk);
 	info->rtc_clk = NULL;
 
@@ -494,6 +496,7 @@ static int s3c_rtc_probe(struct platform_device *pdev)
 		if (IS_ERR(info->rtc_src_clk)) {
 			dev_err(&pdev->dev,
 				"failed to find rtc source clock\n");
+			clk_disable_unprepare(info->rtc_clk);
 			return PTR_ERR(info->rtc_src_clk);
 		}
 		clk_prepare_enable(info->rtc_src_clk);

From 27b15e31dbc5f4d7fc93e6acaa898cbfd1cf74ea Mon Sep 17 00:00:00 2001
From: Joonyoung Shim <jy0922.shim@samsung.com>
Date: Tue, 11 Aug 2015 20:28:20 +0900
Subject: [PATCH 561/734] rtc: s3c: remove unnecessary NULL assignment

It's unnecessary the code that assigns info->rtc_clk to NULL in
s3c_rtc_remove.

Signed-off-by: Joonyoung Shim <jy0922.shim@samsung.com>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-s3c.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index 3ee961529b5b44..7cc8f73a3fe8f6 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -425,7 +425,6 @@ static int s3c_rtc_remove(struct platform_device *pdev)
 	if (info->data->needs_src_clk)
 		clk_unprepare(info->rtc_src_clk);
 	clk_unprepare(info->rtc_clk);
-	info->rtc_clk = NULL;
 
 	return 0;
 }

From dc2280ebf45bfa4fbc4b1588a6642aa33454a9b8 Mon Sep 17 00:00:00 2001
From: Wang Dongsheng <dongsheng.wang@freescale.com>
Date: Wed, 12 Aug 2015 17:14:13 +0800
Subject: [PATCH 562/734] rtc: ds3232: fix WARNING trace in resume function

If ds3232 work on some platform that is not implementing
irq_set_wake, ds3232 will get a WARNING trace in resume.
So fix ds3232->suspended state to false when irq_set_irq_wake
return error.

WARNING: CPU: 0 PID: 729 at kernel/irq/manage.c:604 irq_set_irq_wake+0x4b/0x8c()
Unbalanced IRQ 201 wake disable
Modules linked in:
CPU: 0 PID: 729 Comm: sh Not tainted 3.12.19-rt30+ #25
[<800107d9>] (unwind_backtrace+0x1/0x88) from [<8000e4ef>] (show_stack+0xb/0xc)
[<8000e4ef>] (show_stack+0xb/0xc) from [<802b5fa9>] (dump_stack+0x4d/0x60)
[<802b5fa9>] (dump_stack+0x4d/0x60) from [<800186dd>] (warn_slowpath_common+0x45/0x64)
[<800186dd>] (warn_slowpath_common+0x45/0x64) from [<80018717>] (warn_slowpath_fmt+0x1b/0x24)
[<80018717>] (warn_slowpath_fmt+0x1b/0x24) from [<8003a8d3>] (irq_set_irq_wake+0x4b/0x8c)
[<8003a8d3>] (irq_set_irq_wake+0x4b/0x8c) from [<80204fcb>] (ds3232_resume+0x2d/0x36)
[<80204fcb>] (ds3232_resume+0x2d/0x36) from [<801954c7>] (dpm_run_callback.isra.13+0xb/0x28)
[<801954c7>] (dpm_run_callback.isra.13+0xb/0x28) from [<80195b1b>] (device_resume+0x7b/0xa2)
[<80195b1b>] (device_resume+0x7b/0xa2) from [<80195f0f>] (dpm_resume+0xbb/0x19c)
[<80195f0f>] (dpm_resume+0xbb/0x19c) from [<801960d9>] (dpm_resume_end+0x9/0x12)
[<801960d9>] (dpm_resume_end+0x9/0x12) from [<80037e1d>] (suspend_devices_and_enter+0x17d/0x1d0)
[<80037e1d>] (suspend_devices_and_enter+0x17d/0x1d0) from [<80037ee1>] (pm_suspend+0x71/0x128)
[<80037ee1>] (pm_suspend+0x71/0x128) from [<80037449>] (state_store+0x6d/0x80)
[<80037449>] (state_store+0x6d/0x80) from [<800af4d5>] (sysfs_write_file+0x9f/0xde)
[<800af4d5>] (sysfs_write_file+0x9f/0xde) from [<8007a437>] (vfs_write+0x7b/0x104)
[<8007a437>] (vfs_write+0x7b/0x104) from [<8007a7f7>] (SyS_write+0x27/0x48)
[<8007a7f7>] (SyS_write+0x27/0x48) from [<8000c121>] (ret_fast_syscall+0x1/0x44)

Signed-off-by: Wang Dongsheng <dongsheng.wang@freescale.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ds3232.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-ds3232.c b/drivers/rtc/rtc-ds3232.c
index a8702dda0f2628..4e99ace66f74d1 100644
--- a/drivers/rtc/rtc-ds3232.c
+++ b/drivers/rtc/rtc-ds3232.c
@@ -463,7 +463,10 @@ static int ds3232_suspend(struct device *dev)
 
 	if (device_can_wakeup(dev)) {
 		ds3232->suspended = true;
-		irq_set_irq_wake(client->irq, 1);
+		if (irq_set_irq_wake(client->irq, 1)) {
+			dev_warn_once(dev, "Cannot set wakeup source\n");
+			ds3232->suspended = false;
+		}
 	}
 
 	return 0;

From 8c0961ba7c9356186a0606a391f08e2ecb491a57 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 12 May 2015 16:23:23 -0500
Subject: [PATCH 563/734] rtc: sa1100: prepare to share sa1100_rtc_ops

Factor out the RTC initialization from the platform device specific
parts in order to share the RTC device ops with other drivers.
Specifically, it will be shared with rtc-pxa driver.

Signed-off-by: Rob Herring <robh@kernel.org>
Cc: Robert Jarzmik <robert.jarzmik@free.fr>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Cc: rtc-linux@googlegroups.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-sa1100.c | 59 ++++++++++++++++++++--------------------
 drivers/rtc/rtc-sa1100.h | 19 +++++++++++++
 2 files changed, 49 insertions(+), 29 deletions(-)
 create mode 100644 drivers/rtc/rtc-sa1100.h

diff --git a/drivers/rtc/rtc-sa1100.c b/drivers/rtc/rtc-sa1100.c
index b6e1ca08c2c0ed..abc19abd5f2d29 100644
--- a/drivers/rtc/rtc-sa1100.c
+++ b/drivers/rtc/rtc-sa1100.c
@@ -42,17 +42,12 @@
 #include <mach/regs-rtc.h>
 #endif
 
+#include "rtc-sa1100.h"
+
 #define RTC_DEF_DIVIDER		(32768 - 1)
 #define RTC_DEF_TRIM		0
 #define RTC_FREQ		1024
 
-struct sa1100_rtc {
-	spinlock_t		lock;
-	int			irq_1hz;
-	int			irq_alarm;
-	struct rtc_device	*rtc;
-	struct clk		*clk;
-};
 
 static irqreturn_t sa1100_rtc_interrupt(int irq, void *dev_id)
 {
@@ -223,29 +218,18 @@ static const struct rtc_class_ops sa1100_rtc_ops = {
 	.alarm_irq_enable = sa1100_rtc_alarm_irq_enable,
 };
 
-static int sa1100_rtc_probe(struct platform_device *pdev)
+int sa1100_rtc_init(struct platform_device *pdev, struct sa1100_rtc *info)
 {
 	struct rtc_device *rtc;
-	struct sa1100_rtc *info;
-	int irq_1hz, irq_alarm, ret = 0;
+	int ret;
 
-	irq_1hz = platform_get_irq_byname(pdev, "rtc 1Hz");
-	irq_alarm = platform_get_irq_byname(pdev, "rtc alarm");
-	if (irq_1hz < 0 || irq_alarm < 0)
-		return -ENODEV;
+	spin_lock_init(&info->lock);
 
-	info = devm_kzalloc(&pdev->dev, sizeof(struct sa1100_rtc), GFP_KERNEL);
-	if (!info)
-		return -ENOMEM;
 	info->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(info->clk)) {
 		dev_err(&pdev->dev, "failed to find rtc clock source\n");
 		return PTR_ERR(info->clk);
 	}
-	info->irq_1hz = irq_1hz;
-	info->irq_alarm = irq_alarm;
-	spin_lock_init(&info->lock);
-	platform_set_drvdata(pdev, info);
 
 	ret = clk_prepare_enable(info->clk);
 	if (ret)
@@ -265,14 +249,11 @@ static int sa1100_rtc_probe(struct platform_device *pdev)
 		RCNR = 0;
 	}
 
-	device_init_wakeup(&pdev->dev, 1);
-
 	rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &sa1100_rtc_ops,
 					THIS_MODULE);
-
 	if (IS_ERR(rtc)) {
-		ret = PTR_ERR(rtc);
-		goto err_dev;
+		clk_disable_unprepare(info->clk);
+		return PTR_ERR(rtc);
 	}
 	info->rtc = rtc;
 
@@ -301,9 +282,29 @@ static int sa1100_rtc_probe(struct platform_device *pdev)
 	RTSR = RTSR_AL | RTSR_HZ;
 
 	return 0;
-err_dev:
-	clk_disable_unprepare(info->clk);
-	return ret;
+}
+EXPORT_SYMBOL_GPL(sa1100_rtc_init);
+
+static int sa1100_rtc_probe(struct platform_device *pdev)
+{
+	struct sa1100_rtc *info;
+	int irq_1hz, irq_alarm;
+
+	irq_1hz = platform_get_irq_byname(pdev, "rtc 1Hz");
+	irq_alarm = platform_get_irq_byname(pdev, "rtc alarm");
+	if (irq_1hz < 0 || irq_alarm < 0)
+		return -ENODEV;
+
+	info = devm_kzalloc(&pdev->dev, sizeof(struct sa1100_rtc), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+	info->irq_1hz = irq_1hz;
+	info->irq_alarm = irq_alarm;
+
+	platform_set_drvdata(pdev, info);
+	device_init_wakeup(&pdev->dev, 1);
+
+	return sa1100_rtc_init(pdev, info);
 }
 
 static int sa1100_rtc_remove(struct platform_device *pdev)
diff --git a/drivers/rtc/rtc-sa1100.h b/drivers/rtc/rtc-sa1100.h
new file mode 100644
index 00000000000000..665d054740a179
--- /dev/null
+++ b/drivers/rtc/rtc-sa1100.h
@@ -0,0 +1,19 @@
+#ifndef __RTC_SA1100_H__
+#define __RTC_SA1100_H__
+
+#include <linux/kernel.h>
+
+struct clk;
+struct platform_device;
+
+struct sa1100_rtc {
+	spinlock_t		lock;
+	int			irq_1hz;
+	int			irq_alarm;
+	struct rtc_device	*rtc;
+	struct clk		*clk;
+};
+
+int sa1100_rtc_init(struct platform_device *pdev, struct sa1100_rtc *info);
+
+#endif

From 3cdf4ad9633e3ca616617e76b46915c02cba426b Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Wed, 13 May 2015 09:20:04 -0500
Subject: [PATCH 564/734] rtc: pxa: convert to use shared sa1100 functions

Currently, the rtc-sa1100 and rtc-pxa drivers co-exist as rtc-pxa has a
superset of functionality. Having 2 drivers sharing the same memory
resource is not allowed by the driver model if resources are properly
declared. This problem was avoided by not adding memory resources to the
SA1100 RTC driver, but that prevents clean-up of the SA1100 driver.

This commit converts the PXA RTC to use the exported SA1100 RTC
functions. Now the sa1100-rtc and pxa-rtc devices are mutually
exclusive, so we must remove the sa1100-rtc from pxa27x and pxa3xx.

Signed-off-by: Rob Herring <robh@kernel.org>
Cc: Daniel Mack <daniel@zonque.org>
Cc: Haojian Zhuang <haojian.zhuang@gmail.com>
Cc: Robert Jarzmik <robert.jarzmik@free.fr>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: rtc-linux@googlegroups.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 arch/arm/mach-pxa/pxa27x.c |  1 -
 arch/arm/mach-pxa/pxa3xx.c |  1 -
 drivers/rtc/Kconfig        | 12 +++++----
 drivers/rtc/rtc-pxa.c      | 51 ++++++++++++++++++--------------------
 4 files changed, 31 insertions(+), 34 deletions(-)

diff --git a/arch/arm/mach-pxa/pxa27x.c b/arch/arm/mach-pxa/pxa27x.c
index e6aae9e8adfbdc..221260d5d10923 100644
--- a/arch/arm/mach-pxa/pxa27x.c
+++ b/arch/arm/mach-pxa/pxa27x.c
@@ -282,7 +282,6 @@ static struct platform_device *devices[] __initdata = {
 	&pxa_device_asoc_ssp2,
 	&pxa_device_asoc_ssp3,
 	&pxa_device_asoc_platform,
-	&sa1100_device_rtc,
 	&pxa_device_rtc,
 	&pxa27x_device_ssp1,
 	&pxa27x_device_ssp2,
diff --git a/arch/arm/mach-pxa/pxa3xx.c b/arch/arm/mach-pxa/pxa3xx.c
index 165638462a2f21..ce0f8d6242e2a0 100644
--- a/arch/arm/mach-pxa/pxa3xx.c
+++ b/arch/arm/mach-pxa/pxa3xx.c
@@ -394,7 +394,6 @@ static struct platform_device *devices[] __initdata = {
 	&pxa_device_asoc_ssp3,
 	&pxa_device_asoc_ssp4,
 	&pxa_device_asoc_platform,
-	&sa1100_device_rtc,
 	&pxa_device_rtc,
 	&pxa3xx_device_ssp1,
 	&pxa3xx_device_ssp2,
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 35ea04c50a5cac..0f65a222a48a40 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -1306,11 +1306,13 @@ config RTC_DRV_GENERIC
 	  just say Y.
 
 config RTC_DRV_PXA
-	tristate "PXA27x/PXA3xx"
-	depends on ARCH_PXA
-	help
-	 If you say Y here you will get access to the real time clock
-	 built into your PXA27x or PXA3xx CPU.
+       tristate "PXA27x/PXA3xx"
+       depends on ARCH_PXA
+       select RTC_DRV_SA1100
+       help
+         If you say Y here you will get access to the real time clock
+         built into your PXA27x or PXA3xx CPU. This RTC is actually 2 RTCs
+         consisting of an SA1100 compatible RTC and the extended PXA RTC.
 
 	 This RTC driver uses PXA RTC registers available since pxa27x
 	 series (RDxR, RYxR) instead of legacy RCNR, RTAR.
diff --git a/drivers/rtc/rtc-pxa.c b/drivers/rtc/rtc-pxa.c
index 4561f375327dbe..fb9b3a7d226675 100644
--- a/drivers/rtc/rtc-pxa.c
+++ b/drivers/rtc/rtc-pxa.c
@@ -32,6 +32,8 @@
 
 #include <mach/hardware.h>
 
+#include "rtc-sa1100.h"
+
 #define RTC_DEF_DIVIDER		(32768 - 1)
 #define RTC_DEF_TRIM		0
 #define MAXFREQ_PERIODIC	1000
@@ -86,10 +88,9 @@
 	__raw_writel((value), (pxa_rtc)->base + (reg))
 
 struct pxa_rtc {
+	struct sa1100_rtc sa1100_rtc;
 	struct resource	*ress;
 	void __iomem		*base;
-	int			irq_1Hz;
-	int			irq_Alrm;
 	struct rtc_device	*rtc;
 	spinlock_t		lock;		/* Protects this structure */
 };
@@ -184,25 +185,25 @@ static int pxa_rtc_open(struct device *dev)
 	struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
 	int ret;
 
-	ret = request_irq(pxa_rtc->irq_1Hz, pxa_rtc_irq, 0,
+	ret = request_irq(pxa_rtc->sa1100_rtc.irq_1hz, pxa_rtc_irq, 0,
 			  "rtc 1Hz", dev);
 	if (ret < 0) {
-		dev_err(dev, "can't get irq %i, err %d\n", pxa_rtc->irq_1Hz,
-			ret);
+		dev_err(dev, "can't get irq %i, err %d\n",
+			pxa_rtc->sa1100_rtc.irq_1hz, ret);
 		goto err_irq_1Hz;
 	}
-	ret = request_irq(pxa_rtc->irq_Alrm, pxa_rtc_irq, 0,
+	ret = request_irq(pxa_rtc->sa1100_rtc.irq_alarm, pxa_rtc_irq, 0,
 			  "rtc Alrm", dev);
 	if (ret < 0) {
-		dev_err(dev, "can't get irq %i, err %d\n", pxa_rtc->irq_Alrm,
-			ret);
+		dev_err(dev, "can't get irq %i, err %d\n",
+			pxa_rtc->sa1100_rtc.irq_alarm, ret);
 		goto err_irq_Alrm;
 	}
 
 	return 0;
 
 err_irq_Alrm:
-	free_irq(pxa_rtc->irq_1Hz, dev);
+	free_irq(pxa_rtc->sa1100_rtc.irq_1hz, dev);
 err_irq_1Hz:
 	return ret;
 }
@@ -215,8 +216,8 @@ static void pxa_rtc_release(struct device *dev)
 	rtsr_clear_bits(pxa_rtc, RTSR_PIALE | RTSR_RDALE1 | RTSR_HZE);
 	spin_unlock_irq(&pxa_rtc->lock);
 
-	free_irq(pxa_rtc->irq_Alrm, dev);
-	free_irq(pxa_rtc->irq_1Hz, dev);
+	free_irq(pxa_rtc->sa1100_rtc.irq_1hz, dev);
+	free_irq(pxa_rtc->sa1100_rtc.irq_alarm, dev);
 }
 
 static int pxa_alarm_irq_enable(struct device *dev, unsigned int enabled)
@@ -320,12 +321,13 @@ static int __init pxa_rtc_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct pxa_rtc *pxa_rtc;
+	struct sa1100_rtc *sa1100_rtc;
 	int ret;
-	u32 rttr;
 
 	pxa_rtc = devm_kzalloc(dev, sizeof(*pxa_rtc), GFP_KERNEL);
 	if (!pxa_rtc)
 		return -ENOMEM;
+	sa1100_rtc = &pxa_rtc->sa1100_rtc;
 
 	spin_lock_init(&pxa_rtc->lock);
 	platform_set_drvdata(pdev, pxa_rtc);
@@ -336,13 +338,13 @@ static int __init pxa_rtc_probe(struct platform_device *pdev)
 		return -ENXIO;
 	}
 
-	pxa_rtc->irq_1Hz = platform_get_irq(pdev, 0);
-	if (pxa_rtc->irq_1Hz < 0) {
+	sa1100_rtc->irq_1hz = platform_get_irq(pdev, 0);
+	if (sa1100_rtc->irq_1hz < 0) {
 		dev_err(dev, "No 1Hz IRQ resource defined\n");
 		return -ENXIO;
 	}
-	pxa_rtc->irq_Alrm = platform_get_irq(pdev, 1);
-	if (pxa_rtc->irq_Alrm < 0) {
+	sa1100_rtc->irq_alarm = platform_get_irq(pdev, 1);
+	if (sa1100_rtc->irq_alarm < 0) {
 		dev_err(dev, "No alarm IRQ resource defined\n");
 		return -ENXIO;
 	}
@@ -354,15 +356,10 @@ static int __init pxa_rtc_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	}
 
-	/*
-	 * If the clock divider is uninitialized then reset it to the
-	 * default value to get the 1Hz clock.
-	 */
-	if (rtc_readl(pxa_rtc, RTTR) == 0) {
-		rttr = RTC_DEF_DIVIDER + (RTC_DEF_TRIM << 16);
-		rtc_writel(pxa_rtc, RTTR, rttr);
-		dev_warn(dev, "warning: initializing default clock"
-			 " divider/trim value\n");
+	ret = sa1100_rtc_init(pdev, sa1100_rtc);
+	if (!ret) {
+		dev_err(dev, "Unable to init SA1100 RTC sub-device\n");
+		return ret;
 	}
 
 	rtsr_clear_bits(pxa_rtc, RTSR_PIALE | RTSR_RDALE1 | RTSR_HZE);
@@ -402,7 +399,7 @@ static int pxa_rtc_suspend(struct device *dev)
 	struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
 
 	if (device_may_wakeup(dev))
-		enable_irq_wake(pxa_rtc->irq_Alrm);
+		enable_irq_wake(pxa_rtc->sa1100_rtc.irq_alarm);
 	return 0;
 }
 
@@ -411,7 +408,7 @@ static int pxa_rtc_resume(struct device *dev)
 	struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
 
 	if (device_may_wakeup(dev))
-		disable_irq_wake(pxa_rtc->irq_Alrm);
+		disable_irq_wake(pxa_rtc->sa1100_rtc.irq_alarm);
 	return 0;
 }
 #endif

From 2c4fabec8790384b91473aa3d0d28d4407168ef9 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Mon, 2 Feb 2015 17:50:32 -0600
Subject: [PATCH 565/734] ARM: pxa: add memory resource to SA1100 RTC device

The drivers for the SA1100 and PXA RTCs are now mutually exclusive, so
add the memory resource for the sa1100-rtc device. Since the memory
resource is already present in the pxa_rtc_resources, that makes
sa1100_rtc_resources and pxa_rtc_resources equivalent, so use
pxa_rtc_resources for both devices and remove the duplicate
sa1100_rtc_resources.

Signed-off-by: Rob Herring <robh@kernel.org>
Cc: Daniel Mack <daniel@zonque.org>
Cc: Haojian Zhuang <haojian.zhuang@gmail.com>
Acked-by: Robert Jarzmik <robert.jarzmik@free.fr>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: linux-arm-kernel@lists.infradead.org
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 arch/arm/mach-pxa/devices.c | 18 ++----------------
 1 file changed, 2 insertions(+), 16 deletions(-)

diff --git a/arch/arm/mach-pxa/devices.c b/arch/arm/mach-pxa/devices.c
index e6ce669b54af49..c62473235a1332 100644
--- a/arch/arm/mach-pxa/devices.c
+++ b/arch/arm/mach-pxa/devices.c
@@ -440,25 +440,11 @@ struct platform_device pxa_device_rtc = {
 	.resource       = pxa_rtc_resources,
 };
 
-static struct resource sa1100_rtc_resources[] = {
-	{
-		.start  = IRQ_RTC1Hz,
-		.end    = IRQ_RTC1Hz,
-		.name	= "rtc 1Hz",
-		.flags  = IORESOURCE_IRQ,
-	}, {
-		.start  = IRQ_RTCAlrm,
-		.end    = IRQ_RTCAlrm,
-		.name	= "rtc alarm",
-		.flags  = IORESOURCE_IRQ,
-	},
-};
-
 struct platform_device sa1100_device_rtc = {
 	.name		= "sa1100-rtc",
 	.id		= -1,
-	.num_resources	= ARRAY_SIZE(sa1100_rtc_resources),
-	.resource	= sa1100_rtc_resources,
+	.num_resources  = ARRAY_SIZE(pxa_rtc_resources),
+	.resource       = pxa_rtc_resources,
 };
 
 static struct resource pxa_ac97_resources[] = {

From 90d0ae8e9583355725583e9d1ff0ebdc97936f39 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 3 Feb 2015 14:44:51 -0600
Subject: [PATCH 566/734] rtc: sa1100/pxa: convert to run-time register mapping

SA1100 and PXA differ only in register offsets which are currently
hardcoded in a machine specific header. Some arm64 platforms (PXA1928)
have this RTC block as well (and not the PXA270 variant).

Convert the driver to use ioremap and set the register offsets dynamically.
Since we are touching all the register accesses, convert them all to
readl_relaxed/writel_relaxed.

Signed-off-by: Rob Herring <robh@kernel.org>
Acked-by: Robert Jarzmik <robert.jarzmik@free.fr>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Cc: rtc-linux@googlegroups.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-pxa.c    |  4 ++
 drivers/rtc/rtc-sa1100.c | 82 +++++++++++++++++++++++++++-------------
 drivers/rtc/rtc-sa1100.h |  4 ++
 3 files changed, 63 insertions(+), 27 deletions(-)

diff --git a/drivers/rtc/rtc-pxa.c b/drivers/rtc/rtc-pxa.c
index fb9b3a7d226675..fe4985b5460887 100644
--- a/drivers/rtc/rtc-pxa.c
+++ b/drivers/rtc/rtc-pxa.c
@@ -356,6 +356,10 @@ static int __init pxa_rtc_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	}
 
+	sa1100_rtc->rcnr = pxa_rtc->base + 0x0;
+	sa1100_rtc->rtsr = pxa_rtc->base + 0x8;
+	sa1100_rtc->rtar = pxa_rtc->base + 0x4;
+	sa1100_rtc->rttr = pxa_rtc->base + 0xc;
 	ret = sa1100_rtc_init(pdev, sa1100_rtc);
 	if (!ret) {
 		dev_err(dev, "Unable to init SA1100 RTC sub-device\n");
diff --git a/drivers/rtc/rtc-sa1100.c b/drivers/rtc/rtc-sa1100.c
index abc19abd5f2d29..c2187bf6c7e41e 100644
--- a/drivers/rtc/rtc-sa1100.c
+++ b/drivers/rtc/rtc-sa1100.c
@@ -35,12 +35,10 @@
 #include <linux/bitops.h>
 #include <linux/io.h>
 
-#include <mach/hardware.h>
-#include <mach/irqs.h>
-
-#if defined(CONFIG_ARCH_PXA) || defined(CONFIG_ARCH_MMP)
-#include <mach/regs-rtc.h>
-#endif
+#define RTSR_HZE		BIT(3)	/* HZ interrupt enable */
+#define RTSR_ALE		BIT(2)	/* RTC alarm interrupt enable */
+#define RTSR_HZ			BIT(1)	/* HZ rising-edge detected */
+#define RTSR_AL			BIT(0)	/* RTC alarm detected */
 
 #include "rtc-sa1100.h"
 
@@ -58,16 +56,16 @@ static irqreturn_t sa1100_rtc_interrupt(int irq, void *dev_id)
 
 	spin_lock(&info->lock);
 
-	rtsr = RTSR;
+	rtsr = readl_relaxed(info->rtsr);
 	/* clear interrupt sources */
-	RTSR = 0;
+	writel_relaxed(0, info->rtsr);
 	/* Fix for a nasty initialization problem the in SA11xx RTSR register.
 	 * See also the comments in sa1100_rtc_probe(). */
 	if (rtsr & (RTSR_ALE | RTSR_HZE)) {
 		/* This is the original code, before there was the if test
 		 * above. This code does not clear interrupts that were not
 		 * enabled. */
-		RTSR = (RTSR_AL | RTSR_HZ) & (rtsr >> 2);
+		writel_relaxed((RTSR_AL | RTSR_HZ) & (rtsr >> 2), info->rtsr);
 	} else {
 		/* For some reason, it is possible to enter this routine
 		 * without interruptions enabled, it has been tested with
@@ -76,13 +74,13 @@ static irqreturn_t sa1100_rtc_interrupt(int irq, void *dev_id)
 		 * This situation leads to an infinite "loop" of interrupt
 		 * routine calling and as a result the processor seems to
 		 * lock on its first call to open(). */
-		RTSR = RTSR_AL | RTSR_HZ;
+		writel_relaxed(RTSR_AL | RTSR_HZ, info->rtsr);
 	}
 
 	/* clear alarm interrupt if it has occurred */
 	if (rtsr & RTSR_AL)
 		rtsr &= ~RTSR_ALE;
-	RTSR = rtsr & (RTSR_ALE | RTSR_HZE);
+	writel_relaxed(rtsr & (RTSR_ALE | RTSR_HZE), info->rtsr);
 
 	/* update irq data & counter */
 	if (rtsr & RTSR_AL)
@@ -130,7 +128,7 @@ static void sa1100_rtc_release(struct device *dev)
 	struct sa1100_rtc *info = dev_get_drvdata(dev);
 
 	spin_lock_irq(&info->lock);
-	RTSR = 0;
+	writel_relaxed(0, info->rtsr);
 	spin_unlock_irq(&info->lock);
 
 	free_irq(info->irq_alarm, dev);
@@ -139,39 +137,46 @@ static void sa1100_rtc_release(struct device *dev)
 
 static int sa1100_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
 {
+	u32 rtsr;
 	struct sa1100_rtc *info = dev_get_drvdata(dev);
 
 	spin_lock_irq(&info->lock);
+	rtsr = readl_relaxed(info->rtsr);
 	if (enabled)
-		RTSR |= RTSR_ALE;
+		rtsr |= RTSR_ALE;
 	else
-		RTSR &= ~RTSR_ALE;
+		rtsr &= ~RTSR_ALE;
+	writel_relaxed(rtsr, info->rtsr);
 	spin_unlock_irq(&info->lock);
 	return 0;
 }
 
 static int sa1100_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
-	rtc_time_to_tm(RCNR, tm);
+	struct sa1100_rtc *info = dev_get_drvdata(dev);
+
+	rtc_time_to_tm(readl_relaxed(info->rcnr), tm);
 	return 0;
 }
 
 static int sa1100_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
+	struct sa1100_rtc *info = dev_get_drvdata(dev);
 	unsigned long time;
 	int ret;
 
 	ret = rtc_tm_to_time(tm, &time);
 	if (ret == 0)
-		RCNR = time;
+		writel_relaxed(time, info->rcnr);
 	return ret;
 }
 
 static int sa1100_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 {
 	u32	rtsr;
+	struct sa1100_rtc *info = dev_get_drvdata(dev);
 
-	rtsr = RTSR;
+	rtsr = readl_relaxed(info->rtsr);
 	alrm->enabled = (rtsr & RTSR_ALE) ? 1 : 0;
 	alrm->pending = (rtsr & RTSR_AL) ? 1 : 0;
 	return 0;
@@ -187,12 +192,13 @@ static int sa1100_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 	ret = rtc_tm_to_time(&alrm->time, &time);
 	if (ret != 0)
 		goto out;
-	RTSR = RTSR & (RTSR_HZE|RTSR_ALE|RTSR_AL);
-	RTAR = time;
+	writel_relaxed(readl_relaxed(info->rtsr) &
+		(RTSR_HZE | RTSR_ALE | RTSR_AL), info->rtsr);
+	writel_relaxed(time, info->rtar);
 	if (alrm->enabled)
-		RTSR |= RTSR_ALE;
+		writel_relaxed(readl_relaxed(info->rtsr) | RTSR_ALE, info->rtsr);
 	else
-		RTSR &= ~RTSR_ALE;
+		writel_relaxed(readl_relaxed(info->rtsr) & ~RTSR_ALE, info->rtsr);
 out:
 	spin_unlock_irq(&info->lock);
 
@@ -201,8 +207,10 @@ static int sa1100_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 
 static int sa1100_rtc_proc(struct device *dev, struct seq_file *seq)
 {
-	seq_printf(seq, "trim/divider\t\t: 0x%08x\n", (u32) RTTR);
-	seq_printf(seq, "RTSR\t\t\t: 0x%08x\n", (u32)RTSR);
+	struct sa1100_rtc *info = dev_get_drvdata(dev);
+
+	seq_printf(seq, "trim/divider\t\t: 0x%08x\n", readl_relaxed(info->rttr));
+	seq_printf(seq, "RTSR\t\t\t: 0x%08x\n", readl_relaxed(info->rtsr));
 
 	return 0;
 }
@@ -241,12 +249,12 @@ int sa1100_rtc_init(struct platform_device *pdev, struct sa1100_rtc *info)
 	 * If the clock divider is uninitialized then reset it to the
 	 * default value to get the 1Hz clock.
 	 */
-	if (RTTR == 0) {
-		RTTR = RTC_DEF_DIVIDER + (RTC_DEF_TRIM << 16);
+	if (readl_relaxed(info->rttr) == 0) {
+		writel_relaxed(RTC_DEF_DIVIDER + (RTC_DEF_TRIM << 16), info->rttr);
 		dev_warn(&pdev->dev, "warning: "
 			"initializing default clock divider/trim value\n");
 		/* The current RTC value probably doesn't make sense either */
-		RCNR = 0;
+		writel_relaxed(0, info->rcnr);
 	}
 
 	rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &sa1100_rtc_ops,
@@ -279,7 +287,7 @@ int sa1100_rtc_init(struct platform_device *pdev, struct sa1100_rtc *info)
 	 *
 	 * Notice that clearing bit 1 and 0 is accomplished by writting ONES to
 	 * the corresponding bits in RTSR. */
-	RTSR = RTSR_AL | RTSR_HZ;
+	writel_relaxed(RTSR_AL | RTSR_HZ, info->rtsr);
 
 	return 0;
 }
@@ -288,6 +296,8 @@ EXPORT_SYMBOL_GPL(sa1100_rtc_init);
 static int sa1100_rtc_probe(struct platform_device *pdev)
 {
 	struct sa1100_rtc *info;
+	struct resource *iores;
+	void __iomem *base;
 	int irq_1hz, irq_alarm;
 
 	irq_1hz = platform_get_irq_byname(pdev, "rtc 1Hz");
@@ -301,6 +311,24 @@ static int sa1100_rtc_probe(struct platform_device *pdev)
 	info->irq_1hz = irq_1hz;
 	info->irq_alarm = irq_alarm;
 
+	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	base = devm_ioremap_resource(&pdev->dev, iores);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	if (IS_ENABLED(CONFIG_ARCH_SA1100) ||
+	    of_device_is_compatible(pdev->dev.of_node, "mrvl,sa1100-rtc")) {
+		info->rcnr = base + 0x04;
+		info->rtsr = base + 0x10;
+		info->rtar = base + 0x00;
+		info->rttr = base + 0x08;
+	} else {
+		info->rcnr = base + 0x0;
+		info->rtsr = base + 0x8;
+		info->rtar = base + 0x4;
+		info->rttr = base + 0xc;
+	}
+
 	platform_set_drvdata(pdev, info);
 	device_init_wakeup(&pdev->dev, 1);
 
diff --git a/drivers/rtc/rtc-sa1100.h b/drivers/rtc/rtc-sa1100.h
index 665d054740a179..2c79c0c5782285 100644
--- a/drivers/rtc/rtc-sa1100.h
+++ b/drivers/rtc/rtc-sa1100.h
@@ -8,6 +8,10 @@ struct platform_device;
 
 struct sa1100_rtc {
 	spinlock_t		lock;
+	void __iomem		*rcnr;
+	void __iomem		*rtar;
+	void __iomem		*rtsr;
+	void __iomem		*rttr;
 	int			irq_1hz;
 	int			irq_alarm;
 	struct rtc_device	*rtc;

From d6679c48c13ce896a428d392ffe8a62ad6a75b77 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 3 Feb 2015 14:46:29 -0600
Subject: [PATCH 567/734] ARM: sa1100: remove unused RTC register definitions

Now that register definitions have been moved to the driver, we can remove
them from machine specific code.

Signed-off-by: Rob Herring <robh@kernel.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: linux-arm-kernel@lists.infradead.org
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 arch/arm/mach-sa1100/include/mach/SA-1100.h | 34 ---------------------
 1 file changed, 34 deletions(-)

diff --git a/arch/arm/mach-sa1100/include/mach/SA-1100.h b/arch/arm/mach-sa1100/include/mach/SA-1100.h
index 0ac6cc08a19c6c..7972617cca647a 100644
--- a/arch/arm/mach-sa1100/include/mach/SA-1100.h
+++ b/arch/arm/mach-sa1100/include/mach/SA-1100.h
@@ -857,40 +857,6 @@
 #define OIER_E3 	OIER_E (3)	/* match interrupt Enable 3        */
 
 
-/*
- * Real-Time Clock (RTC) control registers
- *
- * Registers
- *    RTAR      	Real-Time Clock (RTC) Alarm Register (read/write).
- *    RCNR      	Real-Time Clock (RTC) CouNt Register (read/write).
- *    RTTR      	Real-Time Clock (RTC) Trim Register (read/write).
- *    RTSR      	Real-Time Clock (RTC) Status Register (read/write).
- *
- * Clocks
- *    frtx, Trtx	Frequency, period of the real-time clock crystal
- *              	(32.768 kHz nominal).
- *    frtc, Trtc	Frequency, period of the real-time clock counter
- *              	(1 Hz nominal).
- */
-
-#define RTAR		__REG(0x90010000)  /* RTC Alarm Reg. */
-#define RCNR		__REG(0x90010004)  /* RTC CouNt Reg. */
-#define RTTR		__REG(0x90010008)  /* RTC Trim Reg. */
-#define RTSR		__REG(0x90010010)  /* RTC Status Reg. */
-
-#define RTTR_C  	Fld (16, 0)	/* clock divider Count - 1         */
-#define RTTR_D  	Fld (10, 16)	/* trim Delete count               */
-                	        	/* frtc = (1023*(C + 1) - D)*frtx/ */
-                	        	/*        (1023*(C + 1)^2)         */
-                	        	/* Trtc = (1023*(C + 1)^2)*Trtx/   */
-                	        	/*        (1023*(C + 1) - D)       */
-
-#define RTSR_AL 	0x00000001	/* ALarm detected                  */
-#define RTSR_HZ 	0x00000002	/* 1 Hz clock detected             */
-#define RTSR_ALE	0x00000004	/* ALarm interrupt Enable          */
-#define RTSR_HZE	0x00000008	/* 1 Hz clock interrupt Enable     */
-
-
 /*
  * Power Manager (PM) control registers
  *

From 7b758ef4440cd581e8207d762af635c644c85f81 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 3 Feb 2015 14:54:01 -0600
Subject: [PATCH 568/734] ARM: mmp: remove unused RTC register definitions

Now that register definitions have been moved to the driver, regs-rtc.h is
no longer used and can be removed.

Signed-off-by: Rob Herring <robh@kernel.org>
Cc: Eric Miao <eric.y.miao@gmail.com>
Cc: Haojian Zhuang <haojian.zhuang@gmail.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: linux-arm-kernel@lists.infradead.org
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 arch/arm/mach-mmp/include/mach/regs-rtc.h | 23 -----------------------
 1 file changed, 23 deletions(-)
 delete mode 100644 arch/arm/mach-mmp/include/mach/regs-rtc.h

diff --git a/arch/arm/mach-mmp/include/mach/regs-rtc.h b/arch/arm/mach-mmp/include/mach/regs-rtc.h
deleted file mode 100644
index 5bff886a394127..00000000000000
--- a/arch/arm/mach-mmp/include/mach/regs-rtc.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef __ASM_MACH_REGS_RTC_H
-#define __ASM_MACH_REGS_RTC_H
-
-#include <mach/addr-map.h>
-
-#define RTC_VIRT_BASE	(APB_VIRT_BASE + 0x10000)
-#define RTC_REG(x)	(*((volatile u32 __iomem *)(RTC_VIRT_BASE + (x))))
-
-/*
- * Real Time Clock
- */
-
-#define RCNR		RTC_REG(0x00)	/* RTC Count Register */
-#define RTAR		RTC_REG(0x04)	/* RTC Alarm Register */
-#define RTSR		RTC_REG(0x08)	/* RTC Status Register */
-#define RTTR		RTC_REG(0x0C)	/* RTC Timer Trim Register */
-
-#define RTSR_HZE	(1 << 3)	/* HZ interrupt enable */
-#define RTSR_ALE	(1 << 2)	/* RTC alarm interrupt enable */
-#define RTSR_HZ		(1 << 1)	/* HZ rising-edge detected */
-#define RTSR_AL		(1 << 0)	/* RTC alarm detected */
-
-#endif /* __ASM_MACH_REGS_RTC_H */

From e66ce07a9692f492580820640b446971dff97a74 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Mon, 1 Jun 2015 07:53:01 -0500
Subject: [PATCH 569/734] ARM: config: Switch PXA27x platforms to use PXA RTC
 driver

With the SA1100 and PXA RTC drivers be mutually exclusive and no
longer sharing hardware, PXA27x/PXA3xx platforms must use the PXA RTC
driver as the SA1100 platform device is no longer registered.

This change should be almost transparent to userspace. Former users of
pxa-rtc should be aware that 2 RTCs will be available on their kernels,
rtc0 being sa1100-rtc and rtc1 being pxa-rtc. Any userspace relying on
the fact that rtc0 was pxa-rtc should be fixed.

As a consequence:
 - the first reboot after the switch will have the wrong time,
 - on dual boot platform where the other OS programs some logic into the
   sa1100 rtc IP, a lack of fix in userspace, ie. a kernel changing
   sa1100-rtc thinking it is pxa-rtc could have dire consequence, such
   as wiping the other OS data partition.

(Thanks to Robert Jarmik for help on the above commit text.)

Signed-off-by: Rob Herring <robh@kernel.org>
Acked-by: Robert Jarzmik <robert.jarzmik@free.fr>
Cc: Daniel Mack <daniel@zonque.org>
Cc: Haojian Zhuang <haojian.zhuang@gmail.com>
Cc: Sergey Lapin <slapin@ossfans.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Mike Rapoport <mike@compulab.co.il>
Cc: Philipp Zabel <philipp.zabel@gmail.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 arch/arm/configs/cm_x2xx_defconfig  | 2 +-
 arch/arm/configs/em_x270_defconfig  | 2 +-
 arch/arm/configs/magician_defconfig | 2 +-
 arch/arm/configs/palmz72_defconfig  | 2 +-
 arch/arm/configs/pcm027_defconfig   | 2 +-
 arch/arm/configs/trizeps4_defconfig | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/arm/configs/cm_x2xx_defconfig b/arch/arm/configs/cm_x2xx_defconfig
index dc01c049a5206c..3b32d5fd932665 100644
--- a/arch/arm/configs/cm_x2xx_defconfig
+++ b/arch/arm/configs/cm_x2xx_defconfig
@@ -157,7 +157,7 @@ CONFIG_LEDS_TRIGGERS=y
 CONFIG_LEDS_TRIGGER_HEARTBEAT=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_V3020=y
-CONFIG_RTC_DRV_SA1100=y
+CONFIG_RTC_DRV_PXA=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
 CONFIG_INOTIFY=y
diff --git a/arch/arm/configs/em_x270_defconfig b/arch/arm/configs/em_x270_defconfig
index 4560c9ca6636ad..8e10df7ba1b40b 100644
--- a/arch/arm/configs/em_x270_defconfig
+++ b/arch/arm/configs/em_x270_defconfig
@@ -157,7 +157,7 @@ CONFIG_LEDS_TRIGGERS=y
 CONFIG_LEDS_TRIGGER_HEARTBEAT=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_V3020=y
-CONFIG_RTC_DRV_SA1100=y
+CONFIG_RTC_DRV_PXA=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
 CONFIG_INOTIFY=y
diff --git a/arch/arm/configs/magician_defconfig b/arch/arm/configs/magician_defconfig
index 557dd291288b29..a5b4920cd6d447 100644
--- a/arch/arm/configs/magician_defconfig
+++ b/arch/arm/configs/magician_defconfig
@@ -150,7 +150,7 @@ CONFIG_LEDS_TRIGGERS=y
 CONFIG_LEDS_TRIGGER_BACKLIGHT=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DEBUG=y
-CONFIG_RTC_DRV_SA1100=y
+CONFIG_RTC_DRV_PXA=y
 CONFIG_EXT2_FS=y
 CONFIG_INOTIFY=y
 CONFIG_MSDOS_FS=m
diff --git a/arch/arm/configs/palmz72_defconfig b/arch/arm/configs/palmz72_defconfig
index 4baa83c1c57752..83c135e19aba06 100644
--- a/arch/arm/configs/palmz72_defconfig
+++ b/arch/arm/configs/palmz72_defconfig
@@ -67,7 +67,7 @@ CONFIG_MMC=y
 CONFIG_MMC_DEBUG=y
 CONFIG_MMC_PXA=y
 CONFIG_RTC_CLASS=y
-CONFIG_RTC_DRV_SA1100=y
+CONFIG_RTC_DRV_PXA=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
 # CONFIG_DNOTIFY is not set
diff --git a/arch/arm/configs/pcm027_defconfig b/arch/arm/configs/pcm027_defconfig
index 0a847d04ddc198..b5624e325817f8 100644
--- a/arch/arm/configs/pcm027_defconfig
+++ b/arch/arm/configs/pcm027_defconfig
@@ -82,7 +82,7 @@ CONFIG_MMC=y
 CONFIG_MMC_PXA=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_PCF8563=m
-CONFIG_RTC_DRV_SA1100=m
+CONFIG_RTC_DRV_PXA=m
 CONFIG_EXT2_FS=m
 CONFIG_EXT3_FS=m
 # CONFIG_DNOTIFY is not set
diff --git a/arch/arm/configs/trizeps4_defconfig b/arch/arm/configs/trizeps4_defconfig
index 932ee4e4a13ae8..4bc870028035da 100644
--- a/arch/arm/configs/trizeps4_defconfig
+++ b/arch/arm/configs/trizeps4_defconfig
@@ -177,7 +177,7 @@ CONFIG_NEW_LEDS=y
 CONFIG_RTC_CLASS=y
 # CONFIG_RTC_HCTOSYS is not set
 CONFIG_RTC_DRV_PCF8583=m
-CONFIG_RTC_DRV_SA1100=y
+CONFIG_RTC_DRV_PXA=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT2_FS_XATTR=y
 CONFIG_EXT2_FS_POSIX_ACL=y

From a038c3aa9f3afe9ab97a75827789859fb4af5767 Mon Sep 17 00:00:00 2001
From: Bibek Basu <bbasu@nvidia.com>
Date: Fri, 14 Aug 2015 20:44:02 +0200
Subject: [PATCH 570/734] rtc: as3722: correct month value

The RTC month value is 1-indexed, but the kernel assumes it is 0-indexed.
This may result in the RTC not rolling over correctly.

Signed-off-by: Bibek Basu <bbasu@nvidia.com>
Signed-off-by: Felix Janda <felix.janda@posteo.de>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-as3722.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/rtc/rtc-as3722.c b/drivers/rtc/rtc-as3722.c
index 9f38eda69154d4..56cc5821118bfb 100644
--- a/drivers/rtc/rtc-as3722.c
+++ b/drivers/rtc/rtc-as3722.c
@@ -45,7 +45,7 @@ static void as3722_time_to_reg(u8 *rbuff, struct rtc_time *tm)
 	rbuff[1] = bin2bcd(tm->tm_min);
 	rbuff[2] = bin2bcd(tm->tm_hour);
 	rbuff[3] = bin2bcd(tm->tm_mday);
-	rbuff[4] = bin2bcd(tm->tm_mon);
+	rbuff[4] = bin2bcd(tm->tm_mon + 1);
 	rbuff[5] = bin2bcd(tm->tm_year - (AS3722_RTC_START_YEAR - 1900));
 }
 
@@ -55,7 +55,7 @@ static void as3722_reg_to_time(u8 *rbuff, struct rtc_time *tm)
 	tm->tm_min = bcd2bin(rbuff[1] & 0x7F);
 	tm->tm_hour = bcd2bin(rbuff[2] & 0x3F);
 	tm->tm_mday = bcd2bin(rbuff[3] & 0x3F);
-	tm->tm_mon = bcd2bin(rbuff[4] & 0x1F);
+	tm->tm_mon = bcd2bin(rbuff[4] & 0x1F) - 1;
 	tm->tm_year = (AS3722_RTC_START_YEAR - 1900) + bcd2bin(rbuff[5] & 0x7F);
 	return;
 }

From 12ece40d9196e01961192fc25cfdaf22392520de Mon Sep 17 00:00:00 2001
From: Suneel Garapati <suneel.garapati@xilinx.com>
Date: Wed, 19 Aug 2015 15:23:21 +0530
Subject: [PATCH 571/734] devicetree: bindings: rtc: add bindings for xilinx
 zynqmp rtc

adds file for description on device node bindings for RTC
found on Xilinx Zynq Ultrascale+ MPSoC.

Signed-off-by: Suneel Garapati <suneel.garapati@xilinx.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 .../devicetree/bindings/rtc/xlnx-rtc.txt      | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/rtc/xlnx-rtc.txt

diff --git a/Documentation/devicetree/bindings/rtc/xlnx-rtc.txt b/Documentation/devicetree/bindings/rtc/xlnx-rtc.txt
new file mode 100644
index 00000000000000..0df6f016b1b771
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/xlnx-rtc.txt
@@ -0,0 +1,25 @@
+* Xilinx Zynq Ultrascale+ MPSoC Real Time Clock
+
+RTC controller for the Xilinx Zynq MPSoC Real Time Clock
+Separate IRQ lines for seconds and alarm
+
+Required properties:
+- compatible: Should be "xlnx,zynqmp-rtc"
+- reg: Physical base address of the controller and length
+       of memory mapped region.
+- interrupts: IRQ lines for the RTC.
+- interrupt-names: interrupt line names eg. "sec" "alarm"
+
+Optional:
+- calibration: calibration value for 1 sec period which will
+		be programmed directly to calibration register
+
+Example:
+rtc: rtc@ffa60000 {
+	compatible = "xlnx,zynqmp-rtc";
+	reg = <0x0 0xffa60000 0x100>;
+	interrupt-parent = <&gic>;
+	interrupts = <0 26 4>, <0 27 4>;
+	interrupt-names = "alarm", "sec";
+	calibration = <0x198233>;
+};

From 11143c19eb57a8aee4335e57b21f2897b9fff294 Mon Sep 17 00:00:00 2001
From: Suneel Garapati <suneel.garapati@xilinx.com>
Date: Wed, 19 Aug 2015 15:23:22 +0530
Subject: [PATCH 572/734] rtc: add xilinx zynqmp rtc driver

Add support for RTC controller found on Xilinx Zynq Ultrascale+ MPSoC
platform.

Signed-off-by: Suneel Garapati <suneel.garapati@xilinx.com>
Acked-by: Moritz Fischer <moritz.fischer@ettus.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/Kconfig      |   7 +
 drivers/rtc/Makefile     |   1 +
 drivers/rtc/rtc-zynqmp.c | 279 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 287 insertions(+)
 create mode 100644 drivers/rtc/rtc-zynqmp.c

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 0f65a222a48a40..9d4290617cee5a 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -1116,6 +1116,13 @@ config RTC_DRV_OPAL
 	  This driver can also be built as a module. If so, the module
 	  will be called rtc-opal.
 
+config RTC_DRV_ZYNQMP
+	tristate "Xilinx Zynq Ultrascale+ MPSoC RTC"
+	depends on OF
+	help
+	  If you say yes here you get support for the RTC controller found on
+	  Xilinx Zynq Ultrascale+ MPSoC.
+
 comment "on-CPU RTC drivers"
 
 config RTC_DRV_DAVINCI
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 279738449a8d18..e491eb52443422 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -159,3 +159,4 @@ obj-$(CONFIG_RTC_DRV_WM831X)	+= rtc-wm831x.o
 obj-$(CONFIG_RTC_DRV_WM8350)	+= rtc-wm8350.o
 obj-$(CONFIG_RTC_DRV_X1205)	+= rtc-x1205.o
 obj-$(CONFIG_RTC_DRV_XGENE)	+= rtc-xgene.o
+obj-$(CONFIG_RTC_DRV_ZYNQMP)	+= rtc-zynqmp.o
diff --git a/drivers/rtc/rtc-zynqmp.c b/drivers/rtc/rtc-zynqmp.c
new file mode 100644
index 00000000000000..8b28762f06dffd
--- /dev/null
+++ b/drivers/rtc/rtc-zynqmp.c
@@ -0,0 +1,279 @@
+/*
+ * Xilinx Zynq Ultrascale+ MPSoC Real Time Clock Driver
+ *
+ * Copyright (C) 2015 Xilinx, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/rtc.h>
+
+/* RTC Registers */
+#define RTC_SET_TM_WR		0x00
+#define RTC_SET_TM_RD		0x04
+#define RTC_CALIB_WR		0x08
+#define RTC_CALIB_RD		0x0C
+#define RTC_CUR_TM		0x10
+#define RTC_CUR_TICK		0x14
+#define RTC_ALRM		0x18
+#define RTC_INT_STS		0x20
+#define RTC_INT_MASK		0x24
+#define RTC_INT_EN		0x28
+#define RTC_INT_DIS		0x2C
+#define RTC_CTRL		0x40
+
+#define RTC_FR_EN		BIT(20)
+#define RTC_FR_DATSHIFT		16
+#define RTC_TICK_MASK		0xFFFF
+#define RTC_INT_SEC		BIT(0)
+#define RTC_INT_ALRM		BIT(1)
+#define RTC_OSC_EN		BIT(24)
+
+#define RTC_CALIB_DEF		0x198233
+#define RTC_CALIB_MASK		0x1FFFFF
+#define RTC_SEC_MAX_VAL		0xFFFFFFFF
+
+struct xlnx_rtc_dev {
+	struct rtc_device	*rtc;
+	void __iomem		*reg_base;
+	int			alarm_irq;
+	int			sec_irq;
+};
+
+static int xlnx_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	struct xlnx_rtc_dev *xrtcdev = dev_get_drvdata(dev);
+	unsigned long new_time;
+
+	new_time = rtc_tm_to_time64(tm);
+
+	if (new_time > RTC_SEC_MAX_VAL)
+		return -EINVAL;
+
+	writel(new_time, xrtcdev->reg_base + RTC_SET_TM_WR);
+
+	return 0;
+}
+
+static int xlnx_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	struct xlnx_rtc_dev *xrtcdev = dev_get_drvdata(dev);
+
+	rtc_time64_to_tm(readl(xrtcdev->reg_base + RTC_CUR_TM), tm);
+
+	return rtc_valid_tm(tm);
+}
+
+static int xlnx_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	struct xlnx_rtc_dev *xrtcdev = dev_get_drvdata(dev);
+
+	rtc_time64_to_tm(readl(xrtcdev->reg_base + RTC_ALRM), &alrm->time);
+	alrm->enabled = readl(xrtcdev->reg_base + RTC_INT_MASK) & RTC_INT_ALRM;
+
+	return 0;
+}
+
+static int xlnx_rtc_alarm_irq_enable(struct device *dev, u32 enabled)
+{
+	struct xlnx_rtc_dev *xrtcdev = dev_get_drvdata(dev);
+
+	if (enabled)
+		writel(RTC_INT_ALRM, xrtcdev->reg_base + RTC_INT_EN);
+	else
+		writel(RTC_INT_ALRM, xrtcdev->reg_base + RTC_INT_DIS);
+
+	return 0;
+}
+
+static int xlnx_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	struct xlnx_rtc_dev *xrtcdev = dev_get_drvdata(dev);
+	unsigned long alarm_time;
+
+	alarm_time = rtc_tm_to_time64(&alrm->time);
+
+	if (alarm_time > RTC_SEC_MAX_VAL)
+		return -EINVAL;
+
+	writel((u32)alarm_time, (xrtcdev->reg_base + RTC_ALRM));
+
+	xlnx_rtc_alarm_irq_enable(dev, alrm->enabled);
+
+	return 0;
+}
+
+static void xlnx_init_rtc(struct xlnx_rtc_dev *xrtcdev, u32 calibval)
+{
+	/*
+	 * Based on crystal freq of 33.330 KHz
+	 * set the seconds counter and enable, set fractions counter
+	 * to default value suggested as per design spec
+	 * to correct RTC delay in frequency over period of time.
+	 */
+	calibval &= RTC_CALIB_MASK;
+	writel(calibval, (xrtcdev->reg_base + RTC_CALIB_WR));
+}
+
+static const struct rtc_class_ops xlnx_rtc_ops = {
+	.set_time	  = xlnx_rtc_set_time,
+	.read_time	  = xlnx_rtc_read_time,
+	.read_alarm	  = xlnx_rtc_read_alarm,
+	.set_alarm	  = xlnx_rtc_set_alarm,
+	.alarm_irq_enable = xlnx_rtc_alarm_irq_enable,
+};
+
+static irqreturn_t xlnx_rtc_interrupt(int irq, void *id)
+{
+	struct xlnx_rtc_dev *xrtcdev = (struct xlnx_rtc_dev *)id;
+	unsigned int status;
+
+	status = readl(xrtcdev->reg_base + RTC_INT_STS);
+	/* Check if interrupt asserted */
+	if (!(status & (RTC_INT_SEC | RTC_INT_ALRM)))
+		return IRQ_NONE;
+
+	/* Clear interrupt */
+	writel(status, xrtcdev->reg_base + RTC_INT_STS);
+
+	if (status & RTC_INT_SEC)
+		rtc_update_irq(xrtcdev->rtc, 1, RTC_IRQF | RTC_UF);
+	if (status & RTC_INT_ALRM)
+		rtc_update_irq(xrtcdev->rtc, 1, RTC_IRQF | RTC_AF);
+
+	return IRQ_HANDLED;
+}
+
+static int xlnx_rtc_probe(struct platform_device *pdev)
+{
+	struct xlnx_rtc_dev *xrtcdev;
+	struct resource *res;
+	int ret;
+	unsigned int calibvalue;
+
+	xrtcdev = devm_kzalloc(&pdev->dev, sizeof(*xrtcdev), GFP_KERNEL);
+	if (!xrtcdev)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, xrtcdev);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+	xrtcdev->reg_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(xrtcdev->reg_base))
+		return PTR_ERR(xrtcdev->reg_base);
+
+	xrtcdev->alarm_irq = platform_get_irq_byname(pdev, "alarm");
+	if (xrtcdev->alarm_irq < 0) {
+		dev_err(&pdev->dev, "no irq resource\n");
+		return xrtcdev->alarm_irq;
+	}
+	ret = devm_request_irq(&pdev->dev, xrtcdev->alarm_irq,
+			       xlnx_rtc_interrupt, 0,
+			       dev_name(&pdev->dev), xrtcdev);
+	if (ret) {
+		dev_err(&pdev->dev, "request irq failed\n");
+		return ret;
+	}
+
+	xrtcdev->sec_irq = platform_get_irq_byname(pdev, "sec");
+	if (xrtcdev->sec_irq < 0) {
+		dev_err(&pdev->dev, "no irq resource\n");
+		return xrtcdev->sec_irq;
+	}
+	ret = devm_request_irq(&pdev->dev, xrtcdev->sec_irq,
+			       xlnx_rtc_interrupt, 0,
+			       dev_name(&pdev->dev), xrtcdev);
+	if (ret) {
+		dev_err(&pdev->dev, "request irq failed\n");
+		return ret;
+	}
+
+	ret = of_property_read_u32(pdev->dev.of_node, "calibration",
+				   &calibvalue);
+	if (ret)
+		calibvalue = RTC_CALIB_DEF;
+
+	xlnx_init_rtc(xrtcdev, calibvalue);
+
+	device_init_wakeup(&pdev->dev, 1);
+
+	xrtcdev->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
+					 &xlnx_rtc_ops, THIS_MODULE);
+	return PTR_ERR_OR_ZERO(xrtcdev->rtc);
+}
+
+static int xlnx_rtc_remove(struct platform_device *pdev)
+{
+	xlnx_rtc_alarm_irq_enable(&pdev->dev, 0);
+	device_init_wakeup(&pdev->dev, 0);
+
+	return 0;
+}
+
+static int __maybe_unused xlnx_rtc_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct xlnx_rtc_dev *xrtcdev = platform_get_drvdata(pdev);
+
+	if (device_may_wakeup(&pdev->dev))
+		enable_irq_wake(xrtcdev->alarm_irq);
+	else
+		xlnx_rtc_alarm_irq_enable(dev, 0);
+
+	return 0;
+}
+
+static int __maybe_unused xlnx_rtc_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct xlnx_rtc_dev *xrtcdev = platform_get_drvdata(pdev);
+
+	if (device_may_wakeup(&pdev->dev))
+		disable_irq_wake(xrtcdev->alarm_irq);
+	else
+		xlnx_rtc_alarm_irq_enable(dev, 1);
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(xlnx_rtc_pm_ops, xlnx_rtc_suspend, xlnx_rtc_resume);
+
+static const struct of_device_id xlnx_rtc_of_match[] = {
+	{.compatible = "xlnx,zynqmp-rtc" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, xlnx_rtc_of_match);
+
+static struct platform_driver xlnx_rtc_driver = {
+	.probe		= xlnx_rtc_probe,
+	.remove		= xlnx_rtc_remove,
+	.driver		= {
+		.name	= KBUILD_MODNAME,
+		.pm	= &xlnx_rtc_pm_ops,
+		.of_match_table	= xlnx_rtc_of_match,
+	},
+};
+
+module_platform_driver(xlnx_rtc_driver);
+
+MODULE_DESCRIPTION("Xilinx Zynq MPSoC RTC driver");
+MODULE_AUTHOR("Xilinx Inc.");
+MODULE_LICENSE("GPL v2");

From ff02c0444b83201ff76cc49deccac8cf2bffc7bc Mon Sep 17 00:00:00 2001
From: Joonyoung Shim <jy0922.shim@samsung.com>
Date: Fri, 21 Aug 2015 18:43:41 +0900
Subject: [PATCH 573/734] rtc: s5m: fix to update ctrl register

According to datasheet, the S2MPS13X and S2MPS14X should update write
buffer via setting WUDR bit to high after ctrl register is written.

If not, ALARM interrupt of rtc-s5m doesn't happen first time when i use
tools/testing/selftests/timers/rtctest.c test program and hour format is
used to 12 hour mode in Odroid-XU3 board.

One more issue is the RTC doesn't keep time on Odroid-XU3 board when i
turn on board after power off even if RTC battery is connected. It can
be solved as setting WUDR & RUDR bits to high at the same time after
RTC_CTRL register is written. It's same with condition of only writing
ALARM registers, so this is for only S2MPS14 and we should set WUDR &
A_UDR bits to high on S2MPS13.

I can't find any reasonable description about this like fix from
datasheet, but can find similar codes from rtc driver source of
hardkernel kernel and vendor kernel.

Signed-off-by: Joonyoung Shim <jy0922.shim@samsung.com>
Cc: <stable@vger.kernel.org> # v3.16
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Tested-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-s5m.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c
index 8c70d785ba739c..ab60287ee72d67 100644
--- a/drivers/rtc/rtc-s5m.c
+++ b/drivers/rtc/rtc-s5m.c
@@ -635,6 +635,16 @@ static int s5m8767_rtc_init_reg(struct s5m_rtc_info *info)
 	case S2MPS13X:
 		data[0] = (0 << BCD_EN_SHIFT) | (1 << MODEL24_SHIFT);
 		ret = regmap_write(info->regmap, info->regs->ctrl, data[0]);
+		if (ret < 0)
+			break;
+
+		/*
+		 * Should set WUDR & (RUDR or AUDR) bits to high after writing
+		 * RTC_CTRL register like writing Alarm registers. We can't find
+		 * the description from datasheet but vendor code does that
+		 * really.
+		 */
+		ret = s5m8767_rtc_set_alarm_reg(info);
 		break;
 
 	default:

From fff51e771eafc3b4fa6daf1372fd4a4023bb402b Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Tue, 18 Aug 2015 15:11:14 +0530
Subject: [PATCH 574/734] ARM: dts: AM437x: Add the internal and external clock
 nodes for rtc

rtc can either be supplied from internal 32k clock or external crystal
generated 32k clock. Internal clock is SOC specific and the external
clock is board dependent. Adding the corresponding nodes.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 Documentation/devicetree/bindings/rtc/rtc-omap.txt |  4 ++++
 arch/arm/boot/dts/am4372.dtsi                      |  2 ++
 arch/arm/boot/dts/am437x-gp-evm.dts                | 13 +++++++++++++
 arch/arm/boot/dts/am437x-idk-evm.dts               |  9 +++++++++
 arch/arm/boot/dts/am437x-sk-evm.dts                |  9 +++++++++
 5 files changed, 37 insertions(+)

diff --git a/Documentation/devicetree/bindings/rtc/rtc-omap.txt b/Documentation/devicetree/bindings/rtc/rtc-omap.txt
index 43a83668673aac..bf7d11ae9bea68 100644
--- a/Documentation/devicetree/bindings/rtc/rtc-omap.txt
+++ b/Documentation/devicetree/bindings/rtc/rtc-omap.txt
@@ -16,6 +16,8 @@ Required properties:
 Optional properties:
 - system-power-controller: whether the rtc is controlling the system power
   through pmic_power_en
+- clocks: Any internal or external clocks feeding in to rtc
+- clock-names: Corresponding names of the clocks
 
 Example:
 
@@ -26,4 +28,6 @@ rtc@1c23000 {
 		      19>;
 	interrupt-parent = <&intc>;
 	system-power-controller;
+	clocks = <&clk_32k_rtc>, <&clk_32768_ck>;
+	clock-names = "ext-clk", "int-clk";
 };
diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi
index 564900b9fcceb2..0447c04a40cc43 100644
--- a/arch/arm/boot/dts/am4372.dtsi
+++ b/arch/arm/boot/dts/am4372.dtsi
@@ -358,6 +358,8 @@
 			interrupts = <GIC_SPI 75 IRQ_TYPE_LEVEL_HIGH
 				      GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "rtc";
+			clocks = <&clk_32768_ck>;
+			clock-names = "int-clk";
 			status = "disabled";
 		};
 
diff --git a/arch/arm/boot/dts/am437x-gp-evm.dts b/arch/arm/boot/dts/am437x-gp-evm.dts
index 215775dc69483d..22038f21f2283a 100644
--- a/arch/arm/boot/dts/am437x-gp-evm.dts
+++ b/arch/arm/boot/dts/am437x-gp-evm.dts
@@ -112,6 +112,13 @@
 		clock-frequency = <12000000>;
 	};
 
+	/* fixed 32k external oscillator clock */
+	clk_32k_rtc: clk_32k_rtc {
+		#clock-cells = <0>;
+		compatible = "fixed-clock";
+		clock-frequency = <32768>;
+	};
+
 	sound0: sound@0 {
 		compatible = "simple-audio-card";
 		simple-audio-card,name = "AM437x-GP-EVM";
@@ -941,3 +948,9 @@
 	tx-num-evt = <32>;
 	rx-num-evt = <32>;
 };
+
+&rtc {
+	clocks = <&clk_32k_rtc>, <&clk_32768_ck>;
+	clock-names = "ext-clk", "int-clk";
+	status = "okay";
+};
diff --git a/arch/arm/boot/dts/am437x-idk-evm.dts b/arch/arm/boot/dts/am437x-idk-evm.dts
index 378344271746f2..af25801418b49f 100644
--- a/arch/arm/boot/dts/am437x-idk-evm.dts
+++ b/arch/arm/boot/dts/am437x-idk-evm.dts
@@ -110,6 +110,13 @@
 			gpios = <&gpio4 2 GPIO_ACTIVE_LOW>;
 		};
 	};
+
+	/* fixed 32k external oscillator clock */
+	clk_32k_rtc: clk_32k_rtc {
+		#clock-cells = <0>;
+		compatible = "fixed-clock";
+		clock-frequency = <32768>;
+	};
 };
 
 &am43xx_pinmux {
@@ -394,6 +401,8 @@
 };
 
 &rtc {
+	clocks = <&clk_32k_rtc>, <&clk_32768_ck>;
+	clock-names = "ext-clk", "int-clk";
 	status = "okay";
 };
 
diff --git a/arch/arm/boot/dts/am437x-sk-evm.dts b/arch/arm/boot/dts/am437x-sk-evm.dts
index 22af44894c66f7..7da7c2da4af13b 100644
--- a/arch/arm/boot/dts/am437x-sk-evm.dts
+++ b/arch/arm/boot/dts/am437x-sk-evm.dts
@@ -24,6 +24,13 @@
 		display0 = &lcd0;
 	};
 
+	/* fixed 32k external oscillator clock */
+	clk_32k_rtc: clk_32k_rtc {
+		#clock-cells = <0>;
+		compatible = "fixed-clock";
+		clock-frequency = <32768>;
+	};
+
 	backlight {
 		compatible = "pwm-backlight";
 		pwms = <&ecap0 0 50000 PWM_POLARITY_INVERTED>;
@@ -697,6 +704,8 @@
 };
 
 &rtc {
+	clocks = <&clk_32k_rtc>, <&clk_32768_ck>;
+	clock-names = "ext-clk", "int-clk";
 	status = "okay";
 };
 

From 532409aa1ba8b69d5a3dea159d4b1bd9adbd7a46 Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Tue, 18 Aug 2015 15:11:15 +0530
Subject: [PATCH 575/734] rtc: omap: Add internal clock enabling support

The rtc can be clocked by an internal 32K clock. Adding the support
to enable the same.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-omap.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c
index 8b6355ffaff990..f31c0127dae013 100644
--- a/drivers/rtc/rtc-omap.c
+++ b/drivers/rtc/rtc-omap.c
@@ -25,6 +25,7 @@
 #include <linux/of_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/io.h>
+#include <linux/clk.h>
 
 /*
  * The OMAP RTC is a year/month/day/hours/minutes/seconds BCD clock
@@ -132,6 +133,7 @@ struct omap_rtc_device_type {
 struct omap_rtc {
 	struct rtc_device *rtc;
 	void __iomem *base;
+	struct clk *clk;
 	int irq_alarm;
 	int irq_timer;
 	u8 interrupts_reg;
@@ -553,6 +555,11 @@ static int omap_rtc_probe(struct platform_device *pdev)
 	if (rtc->irq_alarm <= 0)
 		return -ENOENT;
 
+	rtc->clk = devm_clk_get(&pdev->dev, "int-clk");
+
+	if (!IS_ERR(rtc->clk))
+		clk_prepare_enable(rtc->clk);
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	rtc->base = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(rtc->base))
@@ -681,6 +688,9 @@ static int __exit omap_rtc_remove(struct platform_device *pdev)
 
 	device_init_wakeup(&pdev->dev, 0);
 
+	if (!IS_ERR(rtc->clk))
+		clk_disable_unprepare(rtc->clk);
+
 	rtc->type->unlock(rtc);
 	/* leave rtc running, but disable irqs */
 	rtc_write(rtc, OMAP_RTC_INTERRUPTS_REG, 0);

From 399cf0f63f6f24d7a837fbfbc801010cb6e77579 Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Tue, 18 Aug 2015 15:11:16 +0530
Subject: [PATCH 576/734] rtc: omap: Add external clock enabling support

Configure the clock source to external clock if available.
External clock is preferred as it can be ticking during suspend.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-omap.c | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c
index f31c0127dae013..ec2e9c5fb993c7 100644
--- a/drivers/rtc/rtc-omap.c
+++ b/drivers/rtc/rtc-omap.c
@@ -108,6 +108,7 @@
 
 /* OMAP_RTC_OSC_REG bit fields: */
 #define OMAP_RTC_OSC_32KCLK_EN		BIT(6)
+#define OMAP_RTC_OSC_SEL_32KCLK_SRC	BIT(3)
 
 /* OMAP_RTC_IRQWAKEEN bit fields: */
 #define OMAP_RTC_IRQWAKEEN_ALARM_WAKEEN	BIT(1)
@@ -138,6 +139,7 @@ struct omap_rtc {
 	int irq_timer;
 	u8 interrupts_reg;
 	bool is_pmic_controller;
+	bool has_ext_clk;
 	const struct omap_rtc_device_type *type;
 };
 
@@ -555,7 +557,11 @@ static int omap_rtc_probe(struct platform_device *pdev)
 	if (rtc->irq_alarm <= 0)
 		return -ENOENT;
 
-	rtc->clk = devm_clk_get(&pdev->dev, "int-clk");
+	rtc->clk = devm_clk_get(&pdev->dev, "ext-clk");
+	if (!IS_ERR(rtc->clk))
+		rtc->has_ext_clk = true;
+	else
+		rtc->clk = devm_clk_get(&pdev->dev, "int-clk");
 
 	if (!IS_ERR(rtc->clk))
 		clk_prepare_enable(rtc->clk);
@@ -634,6 +640,16 @@ static int omap_rtc_probe(struct platform_device *pdev)
 	if (reg != new_ctrl)
 		rtc_write(rtc, OMAP_RTC_CTRL_REG, new_ctrl);
 
+	/*
+	 * If we have the external clock then switch to it so we can keep
+	 * ticking across suspend.
+	 */
+	if (rtc->has_ext_clk) {
+		reg = rtc_read(rtc, OMAP_RTC_OSC_REG);
+		rtc_write(rtc, OMAP_RTC_OSC_REG,
+			  reg | OMAP_RTC_OSC_SEL_32KCLK_SRC);
+	}
+
 	rtc->type->lock(rtc);
 
 	device_init_wakeup(&pdev->dev, true);
@@ -679,6 +695,7 @@ static int omap_rtc_probe(struct platform_device *pdev)
 static int __exit omap_rtc_remove(struct platform_device *pdev)
 {
 	struct omap_rtc *rtc = platform_get_drvdata(pdev);
+	u8 reg;
 
 	if (pm_power_off == omap_rtc_power_off &&
 			omap_rtc_power_off_rtc == rtc) {
@@ -695,6 +712,12 @@ static int __exit omap_rtc_remove(struct platform_device *pdev)
 	/* leave rtc running, but disable irqs */
 	rtc_write(rtc, OMAP_RTC_INTERRUPTS_REG, 0);
 
+	if (rtc->has_ext_clk) {
+		reg = rtc_read(rtc, OMAP_RTC_OSC_REG);
+		reg &= ~OMAP_RTC_OSC_SEL_32KCLK_SRC;
+		rtc_write(rtc, OMAP_RTC_OSC_REG, reg);
+	}
+
 	rtc->type->lock(rtc);
 
 	/* Disable the clock/module */

From 63074cc3d4b8367aa6d9dc5520b88ca6557d32fa Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@osg.samsung.com>
Date: Thu, 27 Aug 2015 12:34:32 +0200
Subject: [PATCH 577/734] rtc: Fix module autoload for rtc-{ab8500,max8997,s5m}
 drivers

These platform drivers have a platform device ID table but the module
alias information is not created so module autoloading will not work.

Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ab8500.c  | 1 +
 drivers/rtc/rtc-max8997.c | 1 +
 drivers/rtc/rtc-s5m.c     | 1 +
 3 files changed, 3 insertions(+)

diff --git a/drivers/rtc/rtc-ab8500.c b/drivers/rtc/rtc-ab8500.c
index 133d2e2e1a2590..8537d1e3a9958f 100644
--- a/drivers/rtc/rtc-ab8500.c
+++ b/drivers/rtc/rtc-ab8500.c
@@ -446,6 +446,7 @@ static const struct platform_device_id ab85xx_rtc_ids[] = {
 	{ "ab8500-rtc", (kernel_ulong_t)&ab8500_rtc_ops, },
 	{ "ab8540-rtc", (kernel_ulong_t)&ab8540_rtc_ops, },
 };
+MODULE_DEVICE_TABLE(platform, ab85xx_rtc_ids);
 
 static int ab8500_rtc_probe(struct platform_device *pdev)
 {
diff --git a/drivers/rtc/rtc-max8997.c b/drivers/rtc/rtc-max8997.c
index 9e02bcda0c0915..db984d4bf9526b 100644
--- a/drivers/rtc/rtc-max8997.c
+++ b/drivers/rtc/rtc-max8997.c
@@ -521,6 +521,7 @@ static const struct platform_device_id rtc_id[] = {
 	{ "max8997-rtc", 0 },
 	{},
 };
+MODULE_DEVICE_TABLE(platform, rtc_id);
 
 static struct platform_driver max8997_rtc_driver = {
 	.driver		= {
diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c
index ab60287ee72d67..f2504b4eef3455 100644
--- a/drivers/rtc/rtc-s5m.c
+++ b/drivers/rtc/rtc-s5m.c
@@ -807,6 +807,7 @@ static const struct platform_device_id s5m_rtc_id[] = {
 	{ "s2mps14-rtc",	S2MPS14X },
 	{ },
 };
+MODULE_DEVICE_TABLE(platform, s5m_rtc_id);
 
 static struct platform_driver s5m_rtc_driver = {
 	.driver		= {

From 73798d5c41fdb434ccbeeec76afc8106869a98c3 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@osg.samsung.com>
Date: Thu, 27 Aug 2015 13:52:02 +0200
Subject: [PATCH 578/734] rtc: Fix module autoload for OF platform drivers

These platform drivers have a OF device ID table but the OF module
alias information is not created so module autoloading won't work.

Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
Acked-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-armada38x.c | 1 +
 drivers/rtc/rtc-coh901331.c | 1 +
 drivers/rtc/rtc-da9063.c    | 1 +
 drivers/rtc/rtc-moxart.c    | 1 +
 drivers/rtc/rtc-mpc5121.c   | 1 +
 drivers/rtc/rtc-mt6397.c    | 1 +
 drivers/rtc/rtc-mv.c        | 1 +
 drivers/rtc/rtc-vt8500.c    | 1 +
 8 files changed, 8 insertions(+)

diff --git a/drivers/rtc/rtc-armada38x.c b/drivers/rtc/rtc-armada38x.c
index 06c6bd5eab4114..9a3f2a6f512e01 100644
--- a/drivers/rtc/rtc-armada38x.c
+++ b/drivers/rtc/rtc-armada38x.c
@@ -295,6 +295,7 @@ static const struct of_device_id armada38x_rtc_of_match_table[] = {
 	{ .compatible = "marvell,armada-380-rtc", },
 	{}
 };
+MODULE_DEVICE_TABLE(of, armada38x_rtc_of_match_table);
 #endif
 
 static struct platform_driver armada38x_rtc_driver = {
diff --git a/drivers/rtc/rtc-coh901331.c b/drivers/rtc/rtc-coh901331.c
index 56343b2fbc685c..101b7a240e0fa8 100644
--- a/drivers/rtc/rtc-coh901331.c
+++ b/drivers/rtc/rtc-coh901331.c
@@ -263,6 +263,7 @@ static const struct of_device_id coh901331_dt_match[] = {
 	{ .compatible = "stericsson,coh901331" },
 	{},
 };
+MODULE_DEVICE_TABLE(of, coh901331_dt_match);
 
 static struct platform_driver coh901331_driver = {
 	.driver = {
diff --git a/drivers/rtc/rtc-da9063.c b/drivers/rtc/rtc-da9063.c
index 5f38a5c84b5622..00a8f7f4f87cbc 100644
--- a/drivers/rtc/rtc-da9063.c
+++ b/drivers/rtc/rtc-da9063.c
@@ -169,6 +169,7 @@ static const struct of_device_id da9063_compatible_reg_id_table[] = {
 	{ .compatible = "dlg,da9062-rtc", .data = &da9062_aa_regs },
 	{ },
 };
+MODULE_DEVICE_TABLE(of, da9063_compatible_reg_id_table);
 
 static void da9063_data_to_tm(u8 *data, struct rtc_time *tm,
 			      struct da9063_compatible_rtc *rtc)
diff --git a/drivers/rtc/rtc-moxart.c b/drivers/rtc/rtc-moxart.c
index 73759c9a4527ae..07b30a373a929f 100644
--- a/drivers/rtc/rtc-moxart.c
+++ b/drivers/rtc/rtc-moxart.c
@@ -312,6 +312,7 @@ static const struct of_device_id moxart_rtc_match[] = {
 	{ .compatible = "moxa,moxart-rtc" },
 	{ },
 };
+MODULE_DEVICE_TABLE(of, moxart_rtc_match);
 
 static struct platform_driver moxart_rtc_driver = {
 	.probe	= moxart_rtc_probe,
diff --git a/drivers/rtc/rtc-mpc5121.c b/drivers/rtc/rtc-mpc5121.c
index 1767e18d5bd484..4ca4daa0b8f32a 100644
--- a/drivers/rtc/rtc-mpc5121.c
+++ b/drivers/rtc/rtc-mpc5121.c
@@ -406,6 +406,7 @@ static const struct of_device_id mpc5121_rtc_match[] = {
 	{ .compatible = "fsl,mpc5200-rtc", },
 	{},
 };
+MODULE_DEVICE_TABLE(of, mpc5121_rtc_match);
 #endif
 
 static struct platform_driver mpc5121_rtc_driver = {
diff --git a/drivers/rtc/rtc-mt6397.c b/drivers/rtc/rtc-mt6397.c
index 30c926b363615c..06a5c52b292f29 100644
--- a/drivers/rtc/rtc-mt6397.c
+++ b/drivers/rtc/rtc-mt6397.c
@@ -402,6 +402,7 @@ static const struct of_device_id mt6397_rtc_of_match[] = {
 	{ .compatible = "mediatek,mt6397-rtc", },
 	{ }
 };
+MODULE_DEVICE_TABLE(of, mt6397_rtc_of_match);
 
 static struct platform_driver mtk_rtc_driver = {
 	.driver = {
diff --git a/drivers/rtc/rtc-mv.c b/drivers/rtc/rtc-mv.c
index 7f50d2ef7f6ef4..79bb28617d458e 100644
--- a/drivers/rtc/rtc-mv.c
+++ b/drivers/rtc/rtc-mv.c
@@ -324,6 +324,7 @@ static const struct of_device_id rtc_mv_of_match_table[] = {
 	{ .compatible = "marvell,orion-rtc", },
 	{}
 };
+MODULE_DEVICE_TABLE(of, rtc_mv_of_match_table);
 #endif
 
 static struct platform_driver mv_rtc_driver = {
diff --git a/drivers/rtc/rtc-vt8500.c b/drivers/rtc/rtc-vt8500.c
index a58b6d17e6f074..27e896995e9b30 100644
--- a/drivers/rtc/rtc-vt8500.c
+++ b/drivers/rtc/rtc-vt8500.c
@@ -271,6 +271,7 @@ static const struct of_device_id wmt_dt_ids[] = {
 	{ .compatible = "via,vt8500-rtc", },
 	{}
 };
+MODULE_DEVICE_TABLE(of, wmt_dt_ids);
 
 static struct platform_driver vt8500_rtc_driver = {
 	.probe		= vt8500_rtc_probe,

From d78908d9717b627a261b1bfe6feb67181e12752e Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@freescale.com>
Date: Mon, 10 Aug 2015 09:47:45 -0300
Subject: [PATCH 579/734] rtc: ds1374: Remove unused variable

Remove unused variable 'res' and fix the following build warning:

drivers/rtc/rtc-ds1374.c:667:6: warning: unused variable 'res' [-Wunused-variable]

Reported-by: Olof's autobuilder <build@lixom.net>
Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ds1374.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/rtc/rtc-ds1374.c b/drivers/rtc/rtc-ds1374.c
index 7067232ba50719..3b3049c8c9e04d 100644
--- a/drivers/rtc/rtc-ds1374.c
+++ b/drivers/rtc/rtc-ds1374.c
@@ -664,8 +664,6 @@ static int ds1374_remove(struct i2c_client *client)
 {
 	struct ds1374 *ds1374 = i2c_get_clientdata(client);
 #ifdef CONFIG_RTC_DRV_DS1374_WDT
-	int res;
-
 	misc_deregister(&ds1374_miscdev);
 	ds1374_miscdev.parent = NULL;
 	unregister_reboot_notifier(&ds1374_wdt_notifier);

From 8a67e93153f03a8d205727c0aeacb5524a414f77 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@freescale.com>
Date: Fri, 4 Sep 2015 08:58:05 -0300
Subject: [PATCH 580/734] rtc: ab8500: Add a sentinel to ab85xx_rtc_ids[]

Add a sentinel to ab85xx_rtc_ids[] in order to fix the following error:

drivers/rtc/rtc-ab8500: struct platform_device_id is 24 bytes.  The last of 2 is:
0x61 0x62 0x38 0x35 0x34 0x30 0x2d 0x72 0x74 0x63 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x8c
FATAL: drivers/rtc/rtc-ab8500: struct platform_device_id is not terminated with a NULL entry!

Reported-by: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Reported-by: Olof's autobuilder <build@lixom.net>
Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ab8500.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/rtc/rtc-ab8500.c b/drivers/rtc/rtc-ab8500.c
index 8537d1e3a9958f..51407c4c7bd2be 100644
--- a/drivers/rtc/rtc-ab8500.c
+++ b/drivers/rtc/rtc-ab8500.c
@@ -445,6 +445,7 @@ static const struct rtc_class_ops ab8540_rtc_ops = {
 static const struct platform_device_id ab85xx_rtc_ids[] = {
 	{ "ab8500-rtc", (kernel_ulong_t)&ab8500_rtc_ops, },
 	{ "ab8540-rtc", (kernel_ulong_t)&ab8540_rtc_ops, },
+	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(platform, ab85xx_rtc_ids);
 

From 5f1b2f77646fc0ef2f36fc554f5722a1381d0892 Mon Sep 17 00:00:00 2001
From: Mitja Spes <mitja@lxnav.com>
Date: Wed, 2 Sep 2015 10:02:29 +0200
Subject: [PATCH 581/734] rtc: abx80x: fix RTC write bit

Fix RTC write bit as per application manual

Cc: stable@vger.kernel.org # 4.1+
Signed-off-by: Mitja Spes <mitja@lxnav.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-abx80x.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-abx80x.c b/drivers/rtc/rtc-abx80x.c
index 4337c3bc6acef0..afea84c7a155cc 100644
--- a/drivers/rtc/rtc-abx80x.c
+++ b/drivers/rtc/rtc-abx80x.c
@@ -28,7 +28,7 @@
 #define ABX8XX_REG_WD		0x07
 
 #define ABX8XX_REG_CTRL1	0x10
-#define ABX8XX_CTRL_WRITE	BIT(1)
+#define ABX8XX_CTRL_WRITE	BIT(0)
 #define ABX8XX_CTRL_12_24	BIT(6)
 
 #define ABX8XX_REG_CFG_KEY	0x1f

From 38d9029a652cb2925a97a8484f6e8f2c85fd55bb Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Fri, 31 Jul 2015 19:34:46 -0700
Subject: [PATCH 582/734] parisc: Define ioremap_uc and ioremap_wc

Commit 3cc2dac5be3f ("drivers/video/fbdev/atyfb: Replace MTRR UC hole
with strong UC") introduces calls to ioremap_wc and ioremap_uc. This
causes build failures with parisc:allmodconfig. Map the missing
functions to ioremap_nocache.

Fixes: 3cc2dac5be3f ("drivers/video/fbdev/atyfb:
	Replace MTRR UC hole with strong UC")
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/include/asm/io.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/parisc/include/asm/io.h b/arch/parisc/include/asm/io.h
index 8cd0abf28ffbbe..1a16f1d1075fc9 100644
--- a/arch/parisc/include/asm/io.h
+++ b/arch/parisc/include/asm/io.h
@@ -137,6 +137,8 @@ static inline void __iomem * ioremap(unsigned long offset, unsigned long size)
 	return __ioremap(offset, size, _PAGE_NO_CACHE);
 }
 #define ioremap_nocache(off, sz)	ioremap((off), (sz))
+#define ioremap_wc			ioremap_nocache
+#define ioremap_uc			ioremap_nocache
 
 extern void iounmap(const volatile void __iomem *addr);
 

From e02a653e15d8d32e9e768fd99a3271aafe5c5d77 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Wed, 2 Sep 2015 18:17:29 +0200
Subject: [PATCH 583/734] PCI,parisc: Enable 64-bit bus addresses on PA-RISC

Commit 3a9ad0b ("PCI: Add pci_bus_addr_t") unconditionally introduced usage of
64-bit PCI bus addresses on all 64-bit platforms which broke PA-RISC.

It turned out that due to enabling the 64-bit addresses, the PCI logic decided
to use the GMMIO instead of the LMMIO region. This commit simply disables
registering the GMMIO and thus we fall back to use the LMMIO region as before.

Reverts commit 45ea2a5fed6dacb9bb0558d8b21eacc1c45d5bb4
("PCI: Don't use 64-bit bus addresses on PA-RISC")

To: linux-parisc@vger.kernel.org
Cc: linux-pci@vger.kernel.org
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Meelis Roos <mroos@linux.ee>
Cc: stable@vger.kernel.org  # v3.19+
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/parisc/lba_pci.c | 7 +++++--
 drivers/pci/Kconfig      | 2 +-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/parisc/lba_pci.c b/drivers/parisc/lba_pci.c
index 901e1a3fa4e268..7b9e89ba0465f1 100644
--- a/drivers/parisc/lba_pci.c
+++ b/drivers/parisc/lba_pci.c
@@ -1555,8 +1555,11 @@ lba_driver_probe(struct parisc_device *dev)
 	if (lba_dev->hba.lmmio_space.flags)
 		pci_add_resource_offset(&resources, &lba_dev->hba.lmmio_space,
 					lba_dev->hba.lmmio_space_offset);
-	if (lba_dev->hba.gmmio_space.flags)
-		pci_add_resource(&resources, &lba_dev->hba.gmmio_space);
+	if (lba_dev->hba.gmmio_space.flags) {
+		/* pci_add_resource(&resources, &lba_dev->hba.gmmio_space); */
+		pr_warn("LBA: Not registering GMMIO space %pR\n",
+			&lba_dev->hba.gmmio_space);
+	}
 
 	pci_add_resource(&resources, &lba_dev->hba.bus_num);
 
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 944f50015ed07b..73de4efcbe6edc 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -2,7 +2,7 @@
 # PCI configuration
 #
 config PCI_BUS_ADDR_T_64BIT
-	def_bool y if (ARCH_DMA_ADDR_T_64BIT || (64BIT && !PARISC))
+	def_bool y if (ARCH_DMA_ADDR_T_64BIT || 64BIT)
 	depends on PCI
 
 config PCI_MSI

From 699817c3df46eb209044d8c9eb20c6ff6c67c81d Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Wed, 2 Sep 2015 18:18:48 +0200
Subject: [PATCH 584/734] parisc: Additionally check for in_atomic() in page
 fault handler

Craig Estey noticed that we didn't checked for in_atomic() in our page fault
handler like other architectures. This commit adds this check by using
faulthandler_disabled() which includes a check for pagefault_disabled() and
in_atomic().

Reported-by: Craig Estey <cae370@gmail.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/mm/fault.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index 15503adddf4f59..a762864ec92e9b 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -207,7 +207,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long code,
 	int fault;
 	unsigned int flags;
 
-	if (pagefault_disabled())
+	if (faulthandler_disabled())
 		goto no_context;
 
 	tsk = current;

From b1b4e435e4ef7de77f07bf2a42c8380b960c2d44 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Thu, 3 Sep 2015 22:45:21 +0200
Subject: [PATCH 585/734] parisc: Filter out spurious interrupts in PA-RISC irq
 handler

When detecting a serial port on newer PA-RISC machines (with iosapic) we have a
long way to go to find the right IRQ line, registering it, then registering the
serial port and the irq handler for the serial port. During this phase spurious
interrupts for the serial port may happen which then crashes the kernel because
the action handler might not have been set up yet.

So, basically it's a race condition between the serial port hardware and the
CPU which sets up the necessary fields in the irq sructs. The main reason for
this race is, that we unmask the serial port irqs too early without having set
up everything properly before (which isn't easily possible because we need the
IRQ number to register the serial ports).

This patch is a work-around for this problem. It adds checks to the CPU irq
handler to verify if the IRQ action field has been initialized already. If not,
we just skip this interrupt (which isn't critical for a serial port at bootup).
The real fix would probably involve rewriting all PA-RISC specific IRQ code
(for CPU, IOSAPIC, GSC and EISA) to use IRQ domains with proper parenting of
the irq chips and proper irq enabling along this line.

This bug has been in the PA-RISC port since the beginning, but the crashes
happened very rarely with currently used hardware.  But on the latest machine
which I bought (a C8000 workstation), which uses the fastest CPUs (4 x PA8900,
1GHz) and which has the largest possible L1 cache size (64MB each), the kernel
crashed at every boot because of this race. So, without this patch the machine
would currently be unuseable.

For the record, here is the flow logic:
1. serial_init_chip() in 8250_gsc.c calls iosapic_serial_irq().
2. iosapic_serial_irq() calls txn_alloc_irq() to find the irq.
3. iosapic_serial_irq() calls cpu_claim_irq() to register the CPU irq
4. cpu_claim_irq() unmasks the CPU irq (which it shouldn't!)
5. serial_init_chip() then registers the 8250 port.
Problems:
- In step 4 the CPU irq shouldn't have been registered yet, but after step 5
- If serial irq happens between 4 and 5 have finished, the kernel will crash

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/irq.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c
index 413ec3c3f9cc50..ba5e1c7b1f177d 100644
--- a/arch/parisc/kernel/irq.c
+++ b/arch/parisc/kernel/irq.c
@@ -507,8 +507,8 @@ void do_cpu_irq_mask(struct pt_regs *regs)
 	struct pt_regs *old_regs;
 	unsigned long eirr_val;
 	int irq, cpu = smp_processor_id();
-#ifdef CONFIG_SMP
 	struct irq_data *irq_data;
+#ifdef CONFIG_SMP
 	cpumask_t dest;
 #endif
 
@@ -521,8 +521,13 @@ void do_cpu_irq_mask(struct pt_regs *regs)
 		goto set_out;
 	irq = eirr_to_irq(eirr_val);
 
-#ifdef CONFIG_SMP
 	irq_data = irq_get_irq_data(irq);
+
+	/* Filter out spurious interrupts, mostly from serial port at bootup */
+	if (unlikely(!irq_desc_has_action(irq_data_to_desc(irq_data))))
+		goto set_out;
+
+#ifdef CONFIG_SMP
 	cpumask_copy(&dest, irq_data_get_affinity_mask(irq_data));
 	if (irqd_is_per_cpu(irq_data) &&
 	    !cpumask_test_cpu(smp_processor_id(), &dest)) {

From 1b59ddfcf1678de38a1f8ca9fb8ea5eebeff1843 Mon Sep 17 00:00:00 2001
From: John David Anglin <dave.anglin@bell.net>
Date: Mon, 7 Sep 2015 20:13:28 -0400
Subject: [PATCH 586/734] parisc: Use double word condition in 64bit CAS
 operation

The attached change fixes the condition used in the "sub" instruction.
A double word comparison is needed.  This fixes the 64-bit LWS CAS
operation on 64-bit kernels.

I can now enable 64-bit atomic support in GCC.

Cc: <stable@vger.kernel.org>
Signed-off-by: John David Anglin <dave.anglin>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/syscall.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index 7ef22e3387e09f..0b8d26d3ba43be 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -821,7 +821,7 @@ cas2_action:
 	/* 64bit CAS */
 #ifdef CONFIG_64BIT
 19:	ldd,ma	0(%sr3,%r26), %r29
-	sub,=	%r29, %r25, %r0
+	sub,*=	%r29, %r25, %r0
 	b,n	cas2_end
 20:	std,ma	%r24, 0(%sr3,%r26)
 	copy	%r0, %r28

From eafd72e016c69df511b14a98b61e439c58ad9c51 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Wed, 19 Aug 2015 18:52:34 +0200
Subject: [PATCH 587/734] xen: avoid early crash of memory limited dom0

Commit b1c9f169047b ("xen: split counting of extra memory pages...")
introduced an error when dom0 was started with limited memory.

The problem arises in case dom0 is started with initial memory and
maximum memory being the same and exactly a multiple of 1 GB. The
kernel must be configured without CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
for the problem to happen. In this case it will crash very early
during boot due to the virtual mapped p2m list not being large
enough to be able to remap any memory:

(XEN) Freed 304kB init memory.
mapping kernel into physical memory
about to get started...
(XEN) traps.c:459:d0v0 Unhandled invalid opcode fault/trap [#6] on VCPU 0 [ec=0000]
(XEN) domain_crash_sync called from entry.S: fault at ffff82d080229a93 create_bounce_frame+0x12b/0x13a
(XEN) Domain 0 (vcpu#0) crashed on cpu#0:
(XEN) ----[ Xen-4.5.2-pre  x86_64  debug=n Not tainted ]----
(XEN) CPU:    0
(XEN) RIP:    e033:[<ffffffff81d120cb>]
(XEN) RFLAGS: 0000000000000206   EM: 1 CONTEXT: pv guest (d0v0)
(XEN) rax: ffffffff81db2000   rbx: 000000004d000000   rcx: 0000000000000000
(XEN) rdx: 000000004d000000   rsi: 0000000000063000   rdi: 000000004d063000
(XEN) rbp: ffffffff81c03d78   rsp: ffffffff81c03d28   r8:  0000000000023000
(XEN) r9:  00000001040ff000   r10: 0000000000007ff0   r11: 0000000000000000
(XEN) r12: 0000000000063000   r13: 000000000004d000   r14: 0000000000000063
(XEN) r15: 0000000000000063   cr0: 0000000080050033   cr4: 00000000000006f0
(XEN) cr3: 0000000105c0f000   cr2: ffffc90000268000
(XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: e02b   cs: e033
(XEN) Guest stack trace from rsp=ffffffff81c03d28:
(XEN)   0000000000000000 0000000000000000 ffffffff81d120cb 000000010000e030
(XEN)   0000000000010006 ffffffff81c03d68 000000000000e02b ffffffffffffffff
(XEN)   0000000000000063 000000000004d063 ffffffff81c03de8 ffffffff81d130a7
(XEN)   ffffffff81c03de8 000000000004d000 00000001040ff000 0000000000105db1
(XEN)   00000001040ff001 000000000004d062 ffff8800092d6ff8 0000000002027000
(XEN)   ffff8800094d8340 ffff8800092d6ff8 00003ffffffff000 ffff8800092d7ff8
(XEN)   ffffffff81c03e48 ffffffff81d13c43 ffff8800094d8000 ffff8800094d9000
(XEN)   0000000000000000 ffff8800092d6000 00000000092d6000 000000004cfbf000
(XEN)   00000000092d6000 00000000052d5442 0000000000000000 0000000000000000
(XEN)   ffffffff81c03ed8 ffffffff81d185c1 0000000000000000 0000000000000000
(XEN)   ffffffff81c03e78 ffffffff810f8ca4 ffffffff81c03ed8 ffffffff8171a15d
(XEN)   0000000000000010 ffffffff81c03ee8 0000000000000000 0000000000000000
(XEN)   ffffffff81f0e402 ffffffffffffffff ffffffff81dae900 0000000000000000
(XEN)   0000000000000000 0000000000000000 ffffffff81c03f28 ffffffff81d0cf0f
(XEN)   0000000000000000 0000000000000000 0000000000000000 ffffffff81db82e0
(XEN)   0000000000000000 0000000000000000 0000000000000000 0000000000000000
(XEN)   ffffffff81c03f38 ffffffff81d0c603 ffffffff81c03ff8 ffffffff81d11c86
(XEN)   0300000100000032 0000000000000005 0000000000000020 0000000000000000
(XEN)   0000000000000000 0000000000000000 0000000000000000 0000000000000000
(XEN)   0000000000000000 0000000000000000 0000000000000000 0000000000000000
(XEN) Domain 0 crashed: rebooting machine in 5 seconds.

This can be avoided by allocating aneough space for the p2m to cover
the maximum memory of dom0 plus the identity mapped holes required
for PCI space, BIOS etc.

Reported-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/x86/xen/setup.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index a1a77eabe8588a..ead0d363bfba8f 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -789,11 +789,12 @@ char * __init xen_memory_setup(void)
 			  &xen_e820_map_entries);
 
 	max_pages = xen_get_max_pages();
-	if (max_pages > max_pfn)
-		extra_pages += max_pages - max_pfn;
 
 	/* How many extra pages do we need due to remapping? */
-	extra_pages += xen_count_remap_pages(max_pfn);
+	max_pages += xen_count_remap_pages(max_pfn);
+
+	if (max_pages > max_pfn)
+		extra_pages += max_pages - max_pfn;
 
 	/*
 	 * Clamp the amount of extra memory to a EXTRA_MEM_RATIO

From ab24507cfae8d916814bb6c16f66e453184a29a5 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Wed, 19 Aug 2015 18:53:11 +0200
Subject: [PATCH 588/734] xen: avoid another early crash of memory limited dom0

Commit b1c9f169047b ("xen: split counting of extra memory pages...")
introduced an error when dom0 was started with limited memory occurring
only on some hardware.

The problem arises in case dom0 is started with initial memory and
maximum memory being the same. The kernel must be configured without
CONFIG_XEN_BALLOON_MEMORY_HOTPLUG for the problem to happen. If all
of this is true and the E820 map of the machine is sparse (some areas
are not covered) then the machine might crash early in the boot
process.

An example E820 map triggering the problem looks like this:

[    0.000000] e820: BIOS-provided physical RAM map:
[    0.000000] BIOS-e820: [mem 0x0000000000000000-0x000000000009d7ff] usable
[    0.000000] BIOS-e820: [mem 0x000000000009d800-0x000000000009ffff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000000e0000-0x00000000000fffff] reserved
[    0.000000] BIOS-e820: [mem 0x0000000000100000-0x00000000cf7fafff] usable
[    0.000000] BIOS-e820: [mem 0x00000000cf7fb000-0x00000000cf95ffff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000cf960000-0x00000000cfb62fff] ACPI NVS
[    0.000000] BIOS-e820: [mem 0x00000000cfb63000-0x00000000cfd14fff] usable
[    0.000000] BIOS-e820: [mem 0x00000000cfd15000-0x00000000cfd61fff] ACPI NVS
[    0.000000] BIOS-e820: [mem 0x00000000cfd62000-0x00000000cfd6cfff] ACPI data
[    0.000000] BIOS-e820: [mem 0x00000000cfd6d000-0x00000000cfd6ffff] ACPI NVS
[    0.000000] BIOS-e820: [mem 0x00000000cfd70000-0x00000000cfd70fff] usable
[    0.000000] BIOS-e820: [mem 0x00000000cfd71000-0x00000000cfea8fff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000cfea9000-0x00000000cfeb9fff] ACPI NVS
[    0.000000] BIOS-e820: [mem 0x00000000cfeba000-0x00000000cfecafff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000cfecb000-0x00000000cfecbfff] ACPI NVS
[    0.000000] BIOS-e820: [mem 0x00000000cfecc000-0x00000000cfedbfff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000cfedc000-0x00000000cfedcfff] ACPI NVS
[    0.000000] BIOS-e820: [mem 0x00000000cfedd000-0x00000000cfeddfff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000cfede000-0x00000000cfee3fff] ACPI NVS
[    0.000000] BIOS-e820: [mem 0x00000000cfee4000-0x00000000cfef6fff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000cfef7000-0x00000000cfefffff] usable
[    0.000000] BIOS-e820: [mem 0x00000000e0000000-0x00000000efffffff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000fec00000-0x00000000fec00fff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000fec10000-0x00000000fec10fff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000fed00000-0x00000000fed00fff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000fed40000-0x00000000fed44fff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000fed61000-0x00000000fed70fff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000fed80000-0x00000000fed8ffff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000ff000000-0x00000000ffffffff] reserved
[    0.000000] BIOS-e820: [mem 0x0000000100001000-0x000000020effffff] usable

In this case the area a0000-dffff isn't present in the map. This will
confuse the memory setup of the domain when remapping the memory from
such holes to populated areas.

To avoid the problem the accounting of to be remapped memory has to
count such holes in the E820 map as well.

Reported-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/x86/xen/setup.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index ead0d363bfba8f..7a5d5666677f01 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -593,20 +593,27 @@ static void __init xen_ignore_unusable(void)
 static unsigned long __init xen_count_remap_pages(unsigned long max_pfn)
 {
 	unsigned long extra = 0;
+	unsigned long start_pfn, end_pfn;
 	const struct e820entry *entry = xen_e820_map;
 	int i;
 
+	end_pfn = 0;
 	for (i = 0; i < xen_e820_map_entries; i++, entry++) {
-		unsigned long start_pfn = PFN_DOWN(entry->addr);
-		unsigned long end_pfn = PFN_UP(entry->addr + entry->size);
+		start_pfn = PFN_DOWN(entry->addr);
+		/* Adjacent regions on non-page boundaries handling! */
+		end_pfn = min(end_pfn, start_pfn);
 
 		if (start_pfn >= max_pfn)
-			break;
-		if (entry->type == E820_RAM)
-			continue;
-		if (end_pfn >= max_pfn)
-			end_pfn = max_pfn;
-		extra += end_pfn - start_pfn;
+			return extra + max_pfn - end_pfn;
+
+		/* Add any holes in map to result. */
+		extra += start_pfn - end_pfn;
+
+		end_pfn = PFN_UP(entry->addr + entry->size);
+		end_pfn = min(end_pfn, max_pfn);
+
+		if (entry->type != E820_RAM)
+			extra += end_pfn - start_pfn;
 	}
 
 	return extra;

From cb9e444b5aaa900bb4310da411315b6947c53e37 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 4 Sep 2015 14:18:08 +0200
Subject: [PATCH 589/734] xen: limit memory to architectural maximum
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a pv-domain (including dom0) is started it tries to size it's
p2m list according to the maximum possible memory amount it ever can
achieve. Limit the initial maximum memory size to the architectural
limit of the hardware in order to avoid overflows during remapping
of memory.

This problem will occur when dom0 is started with an initial memory
size being a multiple of 1GB, but without specifying it's maximum
memory size. The kernel must be configured without
CONFIG_XEN_BALLOON_MEMORY_HOTPLUG for the problem to happen.

Reported-by: Roger Pau Monné <roger.pau@citrix.com>
Tested-by: Roger Pau Monné <roger.pau@citrix.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/x86/xen/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 7a5d5666677f01..70de4c8b8f271d 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -531,7 +531,7 @@ static unsigned long __init xen_get_pages_limit(void)
 #ifdef CONFIG_X86_32
 	limit = GB(64) / PAGE_SIZE;
 #else
-	limit = ~0ul;
+	limit = MAXMEM / PAGE_SIZE;
 	if (!xen_initial_domain() && xen_512gb_limit)
 		limit = GB(512) / PAGE_SIZE;
 #endif

From 626d7508664c4bc8e67f496da4387ecd0c410b8c Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 4 Sep 2015 14:05:51 +0200
Subject: [PATCH 590/734] xen: switch extra memory accounting to use pfns
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of using physical addresses for accounting of extra memory
areas available for ballooning switch to pfns as this is much less
error prone regarding partial pages.

Reported-by: Roger Pau Monné <roger.pau@citrix.com>
Tested-by: Roger Pau Monné <roger.pau@citrix.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/x86/xen/setup.c  | 80 ++++++++++++++++++++++++-------------------
 drivers/xen/balloon.c |  6 ++--
 include/xen/page.h    |  4 +--
 3 files changed, 49 insertions(+), 41 deletions(-)

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 70de4c8b8f271d..f5ef6746d47a0e 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -90,62 +90,69 @@ static void __init xen_parse_512gb(void)
 	xen_512gb_limit = val;
 }
 
-static void __init xen_add_extra_mem(phys_addr_t start, phys_addr_t size)
+static void __init xen_add_extra_mem(unsigned long start_pfn,
+				     unsigned long n_pfns)
 {
 	int i;
 
+	/*
+	 * No need to check for zero size, should happen rarely and will only
+	 * write a new entry regarded to be unused due to zero size.
+	 */
 	for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
 		/* Add new region. */
-		if (xen_extra_mem[i].size == 0) {
-			xen_extra_mem[i].start = start;
-			xen_extra_mem[i].size  = size;
+		if (xen_extra_mem[i].n_pfns == 0) {
+			xen_extra_mem[i].start_pfn = start_pfn;
+			xen_extra_mem[i].n_pfns = n_pfns;
 			break;
 		}
 		/* Append to existing region. */
-		if (xen_extra_mem[i].start + xen_extra_mem[i].size == start) {
-			xen_extra_mem[i].size += size;
+		if (xen_extra_mem[i].start_pfn + xen_extra_mem[i].n_pfns ==
+		    start_pfn) {
+			xen_extra_mem[i].n_pfns += n_pfns;
 			break;
 		}
 	}
 	if (i == XEN_EXTRA_MEM_MAX_REGIONS)
 		printk(KERN_WARNING "Warning: not enough extra memory regions\n");
 
-	memblock_reserve(start, size);
+	memblock_reserve(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns));
 }
 
-static void __init xen_del_extra_mem(phys_addr_t start, phys_addr_t size)
+static void __init xen_del_extra_mem(unsigned long start_pfn,
+				     unsigned long n_pfns)
 {
 	int i;
-	phys_addr_t start_r, size_r;
+	unsigned long start_r, size_r;
 
 	for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
-		start_r = xen_extra_mem[i].start;
-		size_r = xen_extra_mem[i].size;
+		start_r = xen_extra_mem[i].start_pfn;
+		size_r = xen_extra_mem[i].n_pfns;
 
 		/* Start of region. */
-		if (start_r == start) {
-			BUG_ON(size > size_r);
-			xen_extra_mem[i].start += size;
-			xen_extra_mem[i].size -= size;
+		if (start_r == start_pfn) {
+			BUG_ON(n_pfns > size_r);
+			xen_extra_mem[i].start_pfn += n_pfns;
+			xen_extra_mem[i].n_pfns -= n_pfns;
 			break;
 		}
 		/* End of region. */
-		if (start_r + size_r == start + size) {
-			BUG_ON(size > size_r);
-			xen_extra_mem[i].size -= size;
+		if (start_r + size_r == start_pfn + n_pfns) {
+			BUG_ON(n_pfns > size_r);
+			xen_extra_mem[i].n_pfns -= n_pfns;
 			break;
 		}
 		/* Mid of region. */
-		if (start > start_r && start < start_r + size_r) {
-			BUG_ON(start + size > start_r + size_r);
-			xen_extra_mem[i].size = start - start_r;
+		if (start_pfn > start_r && start_pfn < start_r + size_r) {
+			BUG_ON(start_pfn + n_pfns > start_r + size_r);
+			xen_extra_mem[i].n_pfns = start_pfn - start_r;
 			/* Calling memblock_reserve() again is okay. */
-			xen_add_extra_mem(start + size, start_r + size_r -
-					  (start + size));
+			xen_add_extra_mem(start_pfn + n_pfns, start_r + size_r -
+					  (start_pfn + n_pfns));
 			break;
 		}
 	}
-	memblock_free(start, size);
+	memblock_free(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns));
 }
 
 /*
@@ -156,11 +163,10 @@ static void __init xen_del_extra_mem(phys_addr_t start, phys_addr_t size)
 unsigned long __ref xen_chk_extra_mem(unsigned long pfn)
 {
 	int i;
-	phys_addr_t addr = PFN_PHYS(pfn);
 
 	for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
-		if (addr >= xen_extra_mem[i].start &&
-		    addr < xen_extra_mem[i].start + xen_extra_mem[i].size)
+		if (pfn >= xen_extra_mem[i].start_pfn &&
+		    pfn < xen_extra_mem[i].start_pfn + xen_extra_mem[i].n_pfns)
 			return INVALID_P2M_ENTRY;
 	}
 
@@ -176,10 +182,10 @@ void __init xen_inv_extra_mem(void)
 	int i;
 
 	for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
-		if (!xen_extra_mem[i].size)
+		if (!xen_extra_mem[i].n_pfns)
 			continue;
-		pfn_s = PFN_DOWN(xen_extra_mem[i].start);
-		pfn_e = PFN_UP(xen_extra_mem[i].start + xen_extra_mem[i].size);
+		pfn_s = xen_extra_mem[i].start_pfn;
+		pfn_e = pfn_s + xen_extra_mem[i].n_pfns;
 		for (pfn = pfn_s; pfn < pfn_e; pfn++)
 			set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
 	}
@@ -507,7 +513,7 @@ void __init xen_remap_memory(void)
 		} else if (pfn_s + len == xen_remap_buf.target_pfn) {
 			len += xen_remap_buf.size;
 		} else {
-			xen_del_extra_mem(PFN_PHYS(pfn_s), PFN_PHYS(len));
+			xen_del_extra_mem(pfn_s, len);
 			pfn_s = xen_remap_buf.target_pfn;
 			len = xen_remap_buf.size;
 		}
@@ -517,7 +523,7 @@ void __init xen_remap_memory(void)
 	}
 
 	if (pfn_s != ~0UL && len)
-		xen_del_extra_mem(PFN_PHYS(pfn_s), PFN_PHYS(len));
+		xen_del_extra_mem(pfn_s, len);
 
 	set_pte_mfn(buf, mfn_save, PAGE_KERNEL);
 
@@ -744,7 +750,7 @@ static void __init xen_reserve_xen_mfnlist(void)
  **/
 char * __init xen_memory_setup(void)
 {
-	unsigned long max_pfn;
+	unsigned long max_pfn, pfn_s, n_pfns;
 	phys_addr_t mem_end, addr, size, chunk_size;
 	u32 type;
 	int rc;
@@ -831,9 +837,11 @@ char * __init xen_memory_setup(void)
 				chunk_size = min(size, mem_end - addr);
 			} else if (extra_pages) {
 				chunk_size = min(size, PFN_PHYS(extra_pages));
-				extra_pages -= PFN_DOWN(chunk_size);
-				xen_add_extra_mem(addr, chunk_size);
-				xen_max_p2m_pfn = PFN_DOWN(addr + chunk_size);
+				pfn_s = PFN_UP(addr);
+				n_pfns = PFN_DOWN(addr + chunk_size) - pfn_s;
+				extra_pages -= n_pfns;
+				xen_add_extra_mem(pfn_s, n_pfns);
+				xen_max_p2m_pfn = pfn_s + n_pfns;
 			} else
 				type = E820_UNUSABLE;
 		}
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index bf4a23c7c5918f..1fa633b2d556e6 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -638,9 +638,9 @@ static int __init balloon_init(void)
 	 * regions (see arch/x86/xen/setup.c).
 	 */
 	for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++)
-		if (xen_extra_mem[i].size)
-			balloon_add_region(PFN_UP(xen_extra_mem[i].start),
-					   PFN_DOWN(xen_extra_mem[i].size));
+		if (xen_extra_mem[i].n_pfns)
+			balloon_add_region(xen_extra_mem[i].start_pfn,
+					   xen_extra_mem[i].n_pfns);
 
 	return 0;
 }
diff --git a/include/xen/page.h b/include/xen/page.h
index c5ed20bb3fe96d..a5983da2f5cd92 100644
--- a/include/xen/page.h
+++ b/include/xen/page.h
@@ -9,8 +9,8 @@ static inline unsigned long page_to_mfn(struct page *page)
 }
 
 struct xen_memory_region {
-	phys_addr_t start;
-	phys_addr_t size;
+	unsigned long start_pfn;
+	unsigned long n_pfns;
 };
 
 #define XEN_EXTRA_MEM_MAX_REGIONS 128 /* == E820MAX */

From 72581cecee411be2b2c00226c98e0c20aab337a2 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Tue, 8 Sep 2015 17:49:31 +0200
Subject: [PATCH 591/734] parisc: Drop CONFIG_SMP around
 update_cr16_clocksource()

No need to use CONFIG_SMP around update_cr16_clocksource(). It checks for
num_online_cpus() beeing greater than 1, which is always 1 in UP builds.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/time.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index 70e105d62423f5..cc68a4fbce6a94 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -202,7 +202,6 @@ static struct clocksource clocksource_cr16 = {
 	.flags			= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-#ifdef CONFIG_SMP
 int update_cr16_clocksource(void)
 {
 	/* since the cr16 cycle counters are not synchronized across CPUs,
@@ -214,12 +213,6 @@ int update_cr16_clocksource(void)
 
 	return 0;
 }
-#else
-int update_cr16_clocksource(void)
-{
-	return 0; /* no change */
-}
-#endif /*CONFIG_SMP*/
 
 void __init start_cpu_itimer(void)
 {

From 6dc0dcde406bb0e40ad6a6f45f44534d3a094205 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Tue, 8 Sep 2015 17:50:03 +0200
Subject: [PATCH 592/734] parisc: Use
 platform_device_register_simple("rtc-generic")

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/time.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index cc68a4fbce6a94..400acac0a304d1 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -224,20 +224,14 @@ void __init start_cpu_itimer(void)
 	per_cpu(cpu_data, cpu).it_value = next_tick;
 }
 
-static struct platform_device rtc_generic_dev = {
-	.name = "rtc-generic",
-	.id = -1,
-};
-
 static int __init rtc_init(void)
 {
-	if (platform_device_register(&rtc_generic_dev) < 0)
-		printk(KERN_ERR "unable to register rtc device...\n");
+	struct platform_device *pdev;
 
-	/* not necessarily an error */
-	return 0;
+	pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0);
+	return PTR_ERR_OR_ZERO(pdev);
 }
-module_init(rtc_init);
+device_initcall(rtc_init);
 
 void read_persistent_clock(struct timespec *ts)
 {

From 49df2e3e902e1c3caf998f97a92512424936199d Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Wed, 29 Jul 2015 18:53:17 +0200
Subject: [PATCH 593/734] userfaultfd: selftest: update userfaultfd x86 32bit
 syscall number

It changed as result of other syscalls, and while the system call list
itself was correctly updated, the selftest program was not.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/userfaultfd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 0c0b8395335261..76071b14cb9385 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -69,7 +69,7 @@
 #ifdef __x86_64__
 #define __NR_userfaultfd 323
 #elif defined(__i386__)
-#define __NR_userfaultfd 359
+#define __NR_userfaultfd 374
 #elif defined(__powewrpc__)
 #define __NR_userfaultfd 364
 #else

From 61e57c0c3a37539e13af03ce68598034d37c7256 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 8 Sep 2015 14:58:22 -0700
Subject: [PATCH 594/734] cgroup: fix seq_show_option merge with legacy_name

When seq_show_option (commit a068acf2ee77: "fs: create and use
seq_show_option for escaping") was merged, it did not correctly collide
with cgroup's addition of legacy_name (commit 3e1d2eed39d8: "cgroup:
introduce cgroup_subsys->legacy_name") changes.

This fixes the reported name.

Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/cgroup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a8538e4437842d..2cf0f79f1fc901 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1342,7 +1342,7 @@ static int cgroup_show_options(struct seq_file *seq,
 	if (root != &cgrp_dfl_root)
 		for_each_subsys(ss, ssid)
 			if (root->subsys_mask & (1 << ssid))
-				seq_show_option(seq, ss->name, NULL);
+				seq_show_option(seq, ss->legacy_name, NULL);
 	if (root->flags & CGRP_ROOT_NOPREFIX)
 		seq_puts(seq, ",noprefix");
 	if (root->flags & CGRP_ROOT_XATTR)

From af8713b701a74c3784ce6683f64f474a94b1b643 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Tue, 8 Sep 2015 14:58:25 -0700
Subject: [PATCH 595/734] selftests/userfaultfd: fix compiler warnings on
 32-bit

On 32-bit:

    userfaultfd.c: In function 'locking_thread':
    userfaultfd.c:152: warning: left shift count >= width of type
    userfaultfd.c: In function 'uffd_poll_thread':
    userfaultfd.c:295: warning: cast to pointer from integer of different size
    userfaultfd.c: In function 'uffd_read_thread':
    userfaultfd.c:332: warning: cast to pointer from integer of different size

Fix the shift warning by splitting the shift in two parts, and the
integer/pointer warnigns by adding intermediate casts to "unsigned long".

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/userfaultfd.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 0c0b8395335261..b619f672131ea7 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -147,7 +147,8 @@ static void *locking_thread(void *arg)
 			if (sizeof(page_nr) > sizeof(rand_nr)) {
 				if (random_r(&rand, &rand_nr))
 					fprintf(stderr, "random_r 2 error\n"), exit(1);
-				page_nr |= ((unsigned long) rand_nr) << 32;
+				page_nr |= (((unsigned long) rand_nr) << 16) <<
+					   16;
 			}
 		} else
 			page_nr += 1;
@@ -290,7 +291,8 @@ static void *uffd_poll_thread(void *arg)
 				msg.event), exit(1);
 		if (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
 			fprintf(stderr, "unexpected write fault\n"), exit(1);
-		offset = (char *)msg.arg.pagefault.address - area_dst;
+		offset = (char *)(unsigned long)msg.arg.pagefault.address -
+			 area_dst;
 		offset &= ~(page_size-1);
 		if (copy_page(offset))
 			userfaults++;
@@ -327,7 +329,8 @@ static void *uffd_read_thread(void *arg)
 		if (bounces & BOUNCE_VERIFY &&
 		    msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
 			fprintf(stderr, "unexpected write fault\n"), exit(1);
-		offset = (char *)msg.arg.pagefault.address - area_dst;
+		offset = (char *)(unsigned long)msg.arg.pagefault.address -
+			 area_dst;
 		offset &= ~(page_size-1);
 		if (copy_page(offset))
 			(*this_cpu_userfaults)++;

From b5330628546616af14ff23075fbf8d4ad91f6e25 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 8 Sep 2015 14:58:28 -0700
Subject: [PATCH 596/734] mm: introduce vma_is_anonymous(vma) helper

special_mapping_fault() is absolutely broken.  It seems it was always
wrong, but this didn't matter until vdso/vvar started to use more than
one page.

And after this change vma_is_anonymous() becomes really trivial, it
simply checks vm_ops == NULL.  However, I do think the helper makes
sense.  There are a lot of ->vm_ops != NULL checks, the helper makes the
caller's code more understandable (self-documented) and this is more
grep-friendly.

This patch (of 3):

Preparation.  Add the new simple helper, vma_is_anonymous(vma), and change
handle_pte_fault() to use it.  It will have more users.

The name is not accurate, say a hpet_mmap()'ed vma is not anonymous.
Perhaps it should be named vma_has_fault() instead.  But it matches the
logic in mmap.c/memory.c (see next changes).  "True" just means that a
page fault will use do_anonymous_page().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 5 +++++
 mm/memory.c        | 8 ++++----
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8b257c43855bbc..dfb7ce05f1e334 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1260,6 +1260,11 @@ static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr)
 	return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN);
 }
 
+static inline bool vma_is_anonymous(struct vm_area_struct *vma)
+{
+	return !vma->vm_ops;
+}
+
 static inline int stack_guard_page_start(struct vm_area_struct *vma,
 					     unsigned long addr)
 {
diff --git a/mm/memory.c b/mm/memory.c
index bb04d8f2f86c41..882c9d7ae2f5f9 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3267,12 +3267,12 @@ static int handle_pte_fault(struct mm_struct *mm,
 	barrier();
 	if (!pte_present(entry)) {
 		if (pte_none(entry)) {
-			if (vma->vm_ops)
+			if (vma_is_anonymous(vma))
+				return do_anonymous_page(mm, vma, address,
+							 pte, pmd, flags);
+			else
 				return do_fault(mm, vma, address, pte, pmd,
 						flags, entry);
-
-			return do_anonymous_page(mm, vma, address, pte, pmd,
-					flags);
 		}
 		return do_swap_page(mm, vma, address,
 					pte, pmd, flags, entry);

From 8a9cc3b55e9d20289cc18a65257e62c2dd4932fb Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 8 Sep 2015 14:58:31 -0700
Subject: [PATCH 597/734] mmap: fix the usage of ->vm_pgoff in special_mapping
 paths

Test-case:

	#include <stdio.h>
	#include <unistd.h>
	#include <stdlib.h>
	#include <string.h>
	#include <sys/mman.h>
	#include <assert.h>

	void *find_vdso_vaddr(void)
	{
		FILE *perl;
		char buf[32] = {};

		perl = popen("perl -e 'open STDIN,qq|/proc/@{[getppid]}/maps|;"
				"/^(.*?)-.*vdso/ && print hex $1 while <>'", "r");
		fread(buf, sizeof(buf), 1, perl);
		fclose(perl);

		return (void *)atol(buf);
	}

	#define PAGE_SIZE	4096

	int main(void)
	{
		void *vdso = find_vdso_vaddr();
		assert(vdso);

		// of course they should differ, and they do so far
		printf("vdso pages differ: %d\n",
			!!memcmp(vdso, vdso + PAGE_SIZE, PAGE_SIZE));

		// split into 2 vma's
		assert(mprotect(vdso, PAGE_SIZE, PROT_READ) == 0);

		// force another fault on the next check
		assert(madvise(vdso, 2 * PAGE_SIZE, MADV_DONTNEED) == 0);

		// now they no longer differ, the 2nd vm_pgoff is wrong
		printf("vdso pages differ: %d\n",
			!!memcmp(vdso, vdso + PAGE_SIZE, PAGE_SIZE));

		return 0;
	}

Output:

	vdso pages differ: 1
	vdso pages differ: 0

This is because split_vma() correctly updates ->vm_pgoff, but the logic
in insert_vm_struct() and special_mapping_fault() is absolutely broken,
so the fault at vdso + PAGE_SIZE return the 1st page. The same happens
if you simply unmap the 1st page.

special_mapping_fault() does:

	pgoff = vmf->pgoff - vma->vm_pgoff;

and this is _only_ correct if vma->vm_start mmaps the first page from
->vm_private_data array.

vdso or any other user of install_special_mapping() is not anonymous,
it has the "backing storage" even if it is just the array of pages.
So we actually need to make vm_pgoff work as an offset in this array.

Note: this also allows to fix another problem: currently gdb can't access
"[vvar]" memory because in this case special_mapping_fault() doesn't work.
Now that we can use ->vm_pgoff we can implement ->access() and fix this.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/mmap.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/mm/mmap.c b/mm/mmap.c
index 82db4fc0a9d340..52b2f6e16f6f18 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2884,7 +2884,7 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
 	 * using the existing file pgoff checks and manipulations.
 	 * Similarly in do_mmap_pgoff and in do_brk.
 	 */
-	if (!vma->vm_file) {
+	if (vma_is_anonymous(vma)) {
 		BUG_ON(vma->anon_vma);
 		vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
 	}
@@ -3027,21 +3027,13 @@ static int special_mapping_fault(struct vm_area_struct *vma,
 	pgoff_t pgoff;
 	struct page **pages;
 
-	/*
-	 * special mappings have no vm_file, and in that case, the mm
-	 * uses vm_pgoff internally. So we have to subtract it from here.
-	 * We are allowed to do this because we are the mm; do not copy
-	 * this code into drivers!
-	 */
-	pgoff = vmf->pgoff - vma->vm_pgoff;
-
 	if (vma->vm_ops == &legacy_special_mapping_vmops)
 		pages = vma->vm_private_data;
 	else
 		pages = ((struct vm_special_mapping *)vma->vm_private_data)->
 			pages;
 
-	for (; pgoff && *pages; ++pages)
+	for (pgoff = vmf->pgoff; pgoff && *pages; ++pages)
 		pgoff--;
 
 	if (*pages) {

From ce75799b83aaf3fd592e21531a9532bed157c6b5 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 8 Sep 2015 14:58:34 -0700
Subject: [PATCH 598/734] mremap: fix the wrong !vma->vm_file check in
 copy_vma()

Test-case:

	#define _GNU_SOURCE
	#include <stdio.h>
	#include <unistd.h>
	#include <stdlib.h>
	#include <string.h>
	#include <sys/mman.h>
	#include <assert.h>

	void *find_vdso_vaddr(void)
	{
		FILE *perl;
		char buf[32] = {};

		perl = popen("perl -e 'open STDIN,qq|/proc/@{[getppid]}/maps|;"
				"/^(.*?)-.*vdso/ && print hex $1 while <>'", "r");
		fread(buf, sizeof(buf), 1, perl);
		fclose(perl);

		return (void *)atol(buf);
	}

	#define PAGE_SIZE	4096

	void *get_unmapped_area(void)
	{
		void *p = mmap(0, PAGE_SIZE, PROT_NONE,
				MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
		assert(p != MAP_FAILED);
		munmap(p, PAGE_SIZE);
		return p;
	}

	char save[2][PAGE_SIZE];

	int main(void)
	{
		void *vdso = find_vdso_vaddr();
		void *page[2];

		assert(vdso);
		memcpy(save, vdso, sizeof (save));
		// force another fault on the next check
		assert(madvise(vdso, 2 * PAGE_SIZE, MADV_DONTNEED) == 0);

		page[0] = mremap(vdso,
				PAGE_SIZE, PAGE_SIZE, MREMAP_FIXED | MREMAP_MAYMOVE,
				get_unmapped_area());
		page[1] = mremap(vdso + PAGE_SIZE,
				PAGE_SIZE, PAGE_SIZE, MREMAP_FIXED | MREMAP_MAYMOVE,
				get_unmapped_area());

		assert(page[0] != MAP_FAILED && page[1] != MAP_FAILED);
		printf("match: %d %d\n",
			!memcmp(save[0], page[0], PAGE_SIZE),
			!memcmp(save[1], page[1], PAGE_SIZE));

		return 0;
	}

fails without this patch. Before the previous commit it gets the wrong
page, now it segfaults (which is imho better).

This is because copy_vma() wrongly assumes that if vma->vm_file == NULL
is irrelevant until the first fault which will use do_anonymous_page().
This is obviously wrong for the special mapping.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/mmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/mmap.c b/mm/mmap.c
index 52b2f6e16f6f18..52a2373d0ed421 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2918,7 +2918,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 	 * If anonymous vma has not yet been faulted, update new pgoff
 	 * to match new location, to increase its chance of merging.
 	 */
-	if (unlikely(!vma->vm_file && !vma->anon_vma)) {
+	if (unlikely(vma_is_anonymous(vma) && !vma->anon_vma)) {
 		pgoff = addr >> PAGE_SHIFT;
 		faulted_in_anon_vma = false;
 	}

From e1b9996b85ba3ff143ded04523cd015762d20f03 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Tue, 8 Sep 2015 14:58:37 -0700
Subject: [PATCH 599/734] thp: vma_adjust_trans_huge(): adjust file-backed VMA
 too

This series of patches adds support for using PMD page table entries to
map DAX files.  We expect NV-DIMMs to start showing up that are many
gigabytes in size and the memory consumption of 4kB PTEs will be
astronomical.

The patch series leverages much of the Transparant Huge Pages
infrastructure, going so far as to borrow one of Kirill's patches from
his THP page cache series.

This patch (of 10):

Since we're going to have huge pages in page cache, we need to call adjust
file-backed VMA, which potentially can contain huge pages.

For now we call it for all VMAs.

Probably later we will need to introduce a flag to indicate that the VMA
has huge pages.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
Acked-by: Hillf Danton <dhillf@gmail.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h | 11 +----------
 mm/huge_memory.c        |  2 +-
 2 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index f10b20f0515992..1c53c7d7ef7e60 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -122,7 +122,7 @@ extern void split_huge_page_pmd_mm(struct mm_struct *mm, unsigned long address,
 #endif
 extern int hugepage_madvise(struct vm_area_struct *vma,
 			    unsigned long *vm_flags, int advice);
-extern void __vma_adjust_trans_huge(struct vm_area_struct *vma,
+extern void vma_adjust_trans_huge(struct vm_area_struct *vma,
 				    unsigned long start,
 				    unsigned long end,
 				    long adjust_next);
@@ -138,15 +138,6 @@ static inline int pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
 	else
 		return 0;
 }
-static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
-					 unsigned long start,
-					 unsigned long end,
-					 long adjust_next)
-{
-	if (!vma->anon_vma || vma->vm_ops)
-		return;
-	__vma_adjust_trans_huge(vma, start, end, adjust_next);
-}
 static inline int hpage_nr_pages(struct page *page)
 {
 	if (unlikely(PageTransHuge(page)))
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 279a818a39b13d..4d5fcb630d321d 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2991,7 +2991,7 @@ static void split_huge_page_address(struct mm_struct *mm,
 	split_huge_page_pmd_mm(mm, address, pmd);
 }
 
-void __vma_adjust_trans_huge(struct vm_area_struct *vma,
+void vma_adjust_trans_huge(struct vm_area_struct *vma,
 			     unsigned long start,
 			     unsigned long end,
 			     long adjust_next)

From c94c2acf84dc16cf4b989bb0bc849785b7ff52f5 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Tue, 8 Sep 2015 14:58:40 -0700
Subject: [PATCH 600/734] dax: move DAX-related functions to a new header

In order to handle the !CONFIG_TRANSPARENT_HUGEPAGES case, we need to
return VM_FAULT_FALLBACK from the inlined dax_pmd_fault(), which is
defined in linux/mm.h.  Given that we don't want to include <linux/mm.h>
in <linux/fs.h>, the easiest solution is to move the DAX-related
functions to a new header, <linux/dax.h>.  We could also have moved
VM_FAULT_* definitions to a new header, or a different header that isn't
quite such a boil-the-ocean header as <linux/mm.h>, but this felt like
the best option.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Cc: Hillf Danton <dhillf@gmail.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/block_dev.c      |  1 +
 fs/ext2/file.c      |  1 +
 fs/ext2/inode.c     |  1 +
 fs/ext4/file.c      |  1 +
 fs/ext4/indirect.c  |  1 +
 fs/ext4/inode.c     |  1 +
 fs/xfs/xfs_buf.h    |  1 +
 include/linux/dax.h | 21 +++++++++++++++++++++
 include/linux/fs.h  | 14 --------------
 9 files changed, 28 insertions(+), 14 deletions(-)
 create mode 100644 include/linux/dax.h

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 33b813e04f7921..28cc525b8d59a5 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -28,6 +28,7 @@
 #include <linux/namei.h>
 #include <linux/log2.h>
 #include <linux/cleancache.h>
+#include <linux/dax.h>
 #include <asm/uaccess.h>
 #include "internal.h"
 
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 3b57c9f83c9b9b..db4c299b7cf67d 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -20,6 +20,7 @@
 
 #include <linux/time.h>
 #include <linux/pagemap.h>
+#include <linux/dax.h>
 #include <linux/quotaops.h>
 #include "ext2.h"
 #include "xattr.h"
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index a3a404c5df2edf..c60a248c640cb9 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -25,6 +25,7 @@
 #include <linux/time.h>
 #include <linux/highuid.h>
 #include <linux/pagemap.h>
+#include <linux/dax.h>
 #include <linux/quotaops.h>
 #include <linux/writeback.h>
 #include <linux/buffer_head.h>
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index bc313ac5d3fa02..f8a631047379b9 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -22,6 +22,7 @@
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/path.h>
+#include <linux/dax.h>
 #include <linux/quotaops.h>
 #include <linux/pagevec.h>
 #include <linux/uio.h>
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 4f6ac499f09e7c..2468261748b2c5 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -22,6 +22,7 @@
 
 #include "ext4_jbd2.h"
 #include "truncate.h"
+#include <linux/dax.h>
 #include <linux/uio.h>
 
 #include <trace/events/ext4.h>
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 29f1af7c2cab9e..5ebcc7683a5c18 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -22,6 +22,7 @@
 #include <linux/time.h>
 #include <linux/highuid.h>
 #include <linux/pagemap.h>
+#include <linux/dax.h>
 #include <linux/quotaops.h>
 #include <linux/string.h>
 #include <linux/buffer_head.h>
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 331c1ccf826478..c79b717d9b882f 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -23,6 +23,7 @@
 #include <linux/spinlock.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
+#include <linux/dax.h>
 #include <linux/buffer_head.h>
 #include <linux/uio.h>
 #include <linux/list_lru.h>
diff --git a/include/linux/dax.h b/include/linux/dax.h
new file mode 100644
index 00000000000000..4f27d3dbf6e80a
--- /dev/null
+++ b/include/linux/dax.h
@@ -0,0 +1,21 @@
+#ifndef _LINUX_DAX_H
+#define _LINUX_DAX_H
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <asm/pgtable.h>
+
+ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t,
+		  get_block_t, dio_iodone_t, int flags);
+int dax_clear_blocks(struct inode *, sector_t block, long size);
+int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
+int dax_truncate_page(struct inode *, loff_t from, get_block_t);
+int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
+		dax_iodone_t);
+int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
+		dax_iodone_t);
+int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *);
+#define dax_mkwrite(vma, vmf, gb, iod)		dax_fault(vma, vmf, gb, iod)
+#define __dax_mkwrite(vma, vmf, gb, iod)	__dax_fault(vma, vmf, gb, iod)
+
+#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b2f9b9c25e4192..72d8a844c692b2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -52,7 +52,6 @@ struct swap_info_struct;
 struct seq_file;
 struct workqueue_struct;
 struct iov_iter;
-struct vm_fault;
 
 extern void __init inode_init(void);
 extern void __init inode_init_early(void);
@@ -2678,19 +2677,6 @@ extern loff_t fixed_size_llseek(struct file *file, loff_t offset,
 extern int generic_file_open(struct inode * inode, struct file * filp);
 extern int nonseekable_open(struct inode * inode, struct file * filp);
 
-ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t,
-		  get_block_t, dio_iodone_t, int flags);
-int dax_clear_blocks(struct inode *, sector_t block, long size);
-int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
-int dax_truncate_page(struct inode *, loff_t from, get_block_t);
-int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
-		dax_iodone_t);
-int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
-		dax_iodone_t);
-int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *);
-#define dax_mkwrite(vma, vmf, gb, iod)		dax_fault(vma, vmf, gb, iod)
-#define __dax_mkwrite(vma, vmf, gb, iod)	__dax_fault(vma, vmf, gb, iod)
-
 #ifdef CONFIG_BLOCK
 typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode,
 			    loff_t file_offset);

From 7c414164593514f76b422faae0824bdd3754209b Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 8 Sep 2015 14:58:43 -0700
Subject: [PATCH 601/734] dax: revert userfaultfd change

Undo the change which "userfaultfd: call handle_userfault() for
userfaultfd_missing() faults" made to set_huge_zero_page().  DAX will
need that return value.

Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/huge_memory.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 4d5fcb630d321d..ca475dfdb28fe9 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -794,16 +794,19 @@ static inline gfp_t alloc_hugepage_gfpmask(int defrag, gfp_t extra_gfp)
 }
 
 /* Caller must hold page table lock. */
-static void set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
+static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
 		struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd,
 		struct page *zero_page)
 {
 	pmd_t entry;
+	if (!pmd_none(*pmd))
+		return false;
 	entry = mk_pmd(zero_page, vma->vm_page_prot);
 	entry = pmd_mkhuge(entry);
 	pgtable_trans_huge_deposit(mm, pmd, pgtable);
 	set_pmd_at(mm, haddr, pmd, entry);
 	atomic_long_inc(&mm->nr_ptes);
+	return true;
 }
 
 int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,

From 4897c7655d9419ba7e62bac145ec6a1847134d93 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Tue, 8 Sep 2015 14:58:45 -0700
Subject: [PATCH 602/734] thp: prepare for DAX huge pages

Add a vma_is_dax() helper macro to test whether the VMA is DAX, and use it
in zap_huge_pmd() and __split_huge_page_pmd().

[akpm@linux-foundation.org: fix build]
Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Cc: Hillf Danton <dhillf@gmail.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dax.h |  4 ++++
 mm/huge_memory.c    | 48 +++++++++++++++++++++++++++------------------
 2 files changed, 33 insertions(+), 19 deletions(-)

diff --git a/include/linux/dax.h b/include/linux/dax.h
index 4f27d3dbf6e80a..9b51f9d40ad968 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -18,4 +18,8 @@ int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *);
 #define dax_mkwrite(vma, vmf, gb, iod)		dax_fault(vma, vmf, gb, iod)
 #define __dax_mkwrite(vma, vmf, gb, iod)	__dax_fault(vma, vmf, gb, iod)
 
+static inline bool vma_is_dax(struct vm_area_struct *vma)
+{
+	return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
+}
 #endif
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index ca475dfdb28fe9..9057241d572245 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -16,6 +16,7 @@
 #include <linux/swap.h>
 #include <linux/shrinker.h>
 #include <linux/mm_inline.h>
+#include <linux/dax.h>
 #include <linux/kthread.h>
 #include <linux/khugepaged.h>
 #include <linux/freezer.h>
@@ -794,7 +795,7 @@ static inline gfp_t alloc_hugepage_gfpmask(int defrag, gfp_t extra_gfp)
 }
 
 /* Caller must hold page table lock. */
-static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
+bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
 		struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd,
 		struct page *zero_page)
 {
@@ -1421,7 +1422,6 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 	int ret = 0;
 
 	if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
-		struct page *page;
 		pgtable_t pgtable;
 		pmd_t orig_pmd;
 		/*
@@ -1433,13 +1433,22 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 		orig_pmd = pmdp_huge_get_and_clear_full(tlb->mm, addr, pmd,
 							tlb->fullmm);
 		tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
-		pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd);
+		if (vma_is_dax(vma)) {
+			if (is_huge_zero_pmd(orig_pmd)) {
+				pgtable = NULL;
+			} else {
+				spin_unlock(ptl);
+				return 1;
+			}
+		} else {
+			pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd);
+		}
 		if (is_huge_zero_pmd(orig_pmd)) {
 			atomic_long_dec(&tlb->mm->nr_ptes);
 			spin_unlock(ptl);
 			put_huge_zero_page();
 		} else {
-			page = pmd_page(orig_pmd);
+			struct page *page = pmd_page(orig_pmd);
 			page_remove_rmap(page);
 			VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
 			add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
@@ -1448,7 +1457,8 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 			spin_unlock(ptl);
 			tlb_remove_page(tlb, page);
 		}
-		pte_free(tlb->mm, pgtable);
+		if (pgtable)
+			pte_free(tlb->mm, pgtable);
 		ret = 1;
 	}
 	return ret;
@@ -2914,7 +2924,7 @@ void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address,
 		pmd_t *pmd)
 {
 	spinlock_t *ptl;
-	struct page *page;
+	struct page *page = NULL;
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long haddr = address & HPAGE_PMD_MASK;
 	unsigned long mmun_start;	/* For mmu_notifiers */
@@ -2927,25 +2937,25 @@ void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address,
 again:
 	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
 	ptl = pmd_lock(mm, pmd);
-	if (unlikely(!pmd_trans_huge(*pmd))) {
-		spin_unlock(ptl);
-		mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
-		return;
-	}
-	if (is_huge_zero_pmd(*pmd)) {
+	if (unlikely(!pmd_trans_huge(*pmd)))
+		goto unlock;
+	if (vma_is_dax(vma)) {
+		pmdp_huge_clear_flush(vma, haddr, pmd);
+	} else if (is_huge_zero_pmd(*pmd)) {
 		__split_huge_zero_page_pmd(vma, haddr, pmd);
-		spin_unlock(ptl);
-		mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
-		return;
+	} else {
+		page = pmd_page(*pmd);
+		VM_BUG_ON_PAGE(!page_count(page), page);
+		get_page(page);
 	}
-	page = pmd_page(*pmd);
-	VM_BUG_ON_PAGE(!page_count(page), page);
-	get_page(page);
+ unlock:
 	spin_unlock(ptl);
 	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 
-	split_huge_page(page);
+	if (!page)
+		return;
 
+	split_huge_page(page);
 	put_page(page);
 
 	/*

From b96375f74a6d4f39fc6cbdc0bce5175115c7f96f Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Tue, 8 Sep 2015 14:58:48 -0700
Subject: [PATCH 603/734] mm: add a pmd_fault handler

Allow non-anonymous VMAs to provide huge pages in response to a page fault.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Cc: Hillf Danton <dhillf@gmail.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  2 ++
 mm/memory.c        | 30 ++++++++++++++++++++++++------
 2 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index dfb7ce05f1e334..e1fbd18b8c4bb5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -249,6 +249,8 @@ struct vm_operations_struct {
 	void (*close)(struct vm_area_struct * area);
 	int (*mremap)(struct vm_area_struct * area);
 	int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
+	int (*pmd_fault)(struct vm_area_struct *, unsigned long address,
+						pmd_t *, unsigned int flags);
 	void (*map_pages)(struct vm_area_struct *vma, struct vm_fault *vmf);
 
 	/* notification that a previously read-only page is about to become
diff --git a/mm/memory.c b/mm/memory.c
index 882c9d7ae2f5f9..a3f9a8ccec0f9c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3232,6 +3232,27 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	return 0;
 }
 
+static int create_huge_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
+			unsigned long address, pmd_t *pmd, unsigned int flags)
+{
+	if (!vma->vm_ops)
+		return do_huge_pmd_anonymous_page(mm, vma, address, pmd, flags);
+	if (vma->vm_ops->pmd_fault)
+		return vma->vm_ops->pmd_fault(vma, address, pmd, flags);
+	return VM_FAULT_FALLBACK;
+}
+
+static int wp_huge_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
+			unsigned long address, pmd_t *pmd, pmd_t orig_pmd,
+			unsigned int flags)
+{
+	if (!vma->vm_ops)
+		return do_huge_pmd_wp_page(mm, vma, address, pmd, orig_pmd);
+	if (vma->vm_ops->pmd_fault)
+		return vma->vm_ops->pmd_fault(vma, address, pmd, flags);
+	return VM_FAULT_FALLBACK;
+}
+
 /*
  * These routines also need to handle stuff like marking pages dirty
  * and/or accessed for architectures that don't do it in hardware (most
@@ -3334,10 +3355,7 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (!pmd)
 		return VM_FAULT_OOM;
 	if (pmd_none(*pmd) && transparent_hugepage_enabled(vma)) {
-		int ret = VM_FAULT_FALLBACK;
-		if (!vma->vm_ops)
-			ret = do_huge_pmd_anonymous_page(mm, vma, address,
-					pmd, flags);
+		int ret = create_huge_pmd(mm, vma, address, pmd, flags);
 		if (!(ret & VM_FAULT_FALLBACK))
 			return ret;
 	} else {
@@ -3361,8 +3379,8 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 							     orig_pmd, pmd);
 
 			if (dirty && !pmd_write(orig_pmd)) {
-				ret = do_huge_pmd_wp_page(mm, vma, address, pmd,
-							  orig_pmd);
+				ret = wp_huge_pmd(mm, vma, address, pmd,
+							orig_pmd, flags);
 				if (!(ret & VM_FAULT_FALLBACK))
 					return ret;
 			} else {

From fc43704437ebe40f642ac53f7ee73661fe74e6b8 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Tue, 8 Sep 2015 14:58:51 -0700
Subject: [PATCH 604/734] mm: export various functions for the benefit of DAX

To use the huge zero page in DAX, we need these functions exported.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Cc: Hillf Danton <dhillf@gmail.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h | 10 ++++++++++
 mm/huge_memory.c        |  7 +------
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 1c53c7d7ef7e60..70587ea079c3ac 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -155,6 +155,16 @@ static inline bool is_huge_zero_page(struct page *page)
 	return ACCESS_ONCE(huge_zero_page) == page;
 }
 
+static inline bool is_huge_zero_pmd(pmd_t pmd)
+{
+	return is_huge_zero_page(pmd_page(pmd));
+}
+
+struct page *get_huge_zero_page(void);
+bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
+		struct vm_area_struct *vma, unsigned long haddr,
+		pmd_t *pmd, struct page *zero_page);
+
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
 #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9057241d572245..c426a89e025c72 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -173,12 +173,7 @@ static int start_stop_khugepaged(void)
 static atomic_t huge_zero_refcount;
 struct page *huge_zero_page __read_mostly;
 
-static inline bool is_huge_zero_pmd(pmd_t pmd)
-{
-	return is_huge_zero_page(pmd_page(pmd));
-}
-
-static struct page *get_huge_zero_page(void)
+struct page *get_huge_zero_page(void)
 {
 	struct page *zero_page;
 retry:

From 5cad465d7fa646bad3d677df276bfc8e2ad709e3 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Tue, 8 Sep 2015 14:58:54 -0700
Subject: [PATCH 605/734] mm: add vmf_insert_pfn_pmd()

Similar to vm_insert_pfn(), but for PMDs rather than PTEs.  The 'vmf_'
prefix instead of 'vm_' prefix is intended to indicate that it returns a
VMF_ value rather than an errno (which would only have to be converted
into a VMF_ value anyway).

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Cc: Hillf Danton <dhillf@gmail.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h |  2 ++
 mm/huge_memory.c        | 43 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 70587ea079c3ac..f9b612fec4dd82 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -33,6 +33,8 @@ extern int move_huge_pmd(struct vm_area_struct *vma,
 extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 			unsigned long addr, pgprot_t newprot,
 			int prot_numa);
+int vmf_insert_pfn_pmd(struct vm_area_struct *, unsigned long addr, pmd_t *,
+			unsigned long pfn, bool write);
 
 enum transparent_hugepage_flag {
 	TRANSPARENT_HUGEPAGE_FLAG,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index c426a89e025c72..3ea6f908a5e013 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -869,6 +869,49 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 					    flags);
 }
 
+static int insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
+		pmd_t *pmd, unsigned long pfn, pgprot_t prot, bool write)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	pmd_t entry;
+	spinlock_t *ptl;
+
+	ptl = pmd_lock(mm, pmd);
+	if (pmd_none(*pmd)) {
+		entry = pmd_mkhuge(pfn_pmd(pfn, prot));
+		if (write) {
+			entry = pmd_mkyoung(pmd_mkdirty(entry));
+			entry = maybe_pmd_mkwrite(entry, vma);
+		}
+		set_pmd_at(mm, addr, pmd, entry);
+		update_mmu_cache_pmd(vma, addr, pmd);
+	}
+	spin_unlock(ptl);
+	return VM_FAULT_NOPAGE;
+}
+
+int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
+			pmd_t *pmd, unsigned long pfn, bool write)
+{
+	pgprot_t pgprot = vma->vm_page_prot;
+	/*
+	 * If we had pmd_special, we could avoid all these restrictions,
+	 * but we need to be consistent with PTEs and architectures that
+	 * can't support a 'special' bit.
+	 */
+	BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)));
+	BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) ==
+						(VM_PFNMAP|VM_MIXEDMAP));
+	BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags));
+	BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn));
+
+	if (addr < vma->vm_start || addr >= vma->vm_end)
+		return VM_FAULT_SIGBUS;
+	if (track_pfn_insert(vma, &pgprot, pfn))
+		return VM_FAULT_SIGBUS;
+	return insert_pfn_pmd(vma, addr, pmd, pfn, pgprot, write);
+}
+
 int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		  pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
 		  struct vm_area_struct *vma)

From 844f35db1088dd1a9de37b53d4d823626232bd19 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Tue, 8 Sep 2015 14:58:57 -0700
Subject: [PATCH 606/734] dax: add huge page fault support

This is the support code for DAX-enabled filesystems to allow them to
provide huge pages in response to faults.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Cc: Hillf Danton <dhillf@gmail.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/dax.txt |   7 +-
 fs/dax.c                          | 152 ++++++++++++++++++++++++++++++
 include/linux/dax.h               |  14 +++
 3 files changed, 170 insertions(+), 3 deletions(-)

diff --git a/Documentation/filesystems/dax.txt b/Documentation/filesystems/dax.txt
index 7af2851d667c7a..7bde64014a8971 100644
--- a/Documentation/filesystems/dax.txt
+++ b/Documentation/filesystems/dax.txt
@@ -60,9 +60,10 @@ Filesystem support consists of
 - implementing the direct_IO address space operation, and calling
   dax_do_io() instead of blockdev_direct_IO() if S_DAX is set
 - implementing an mmap file operation for DAX files which sets the
-  VM_MIXEDMAP flag on the VMA, and setting the vm_ops to include handlers
-  for fault and page_mkwrite (which should probably call dax_fault() and
-  dax_mkwrite(), passing the appropriate get_block() callback)
+  VM_MIXEDMAP and VM_HUGEPAGE flags on the VMA, and setting the vm_ops to
+  include handlers for fault, pmd_fault and page_mkwrite (which should
+  probably call dax_fault(), dax_pmd_fault() and dax_mkwrite(), passing the
+  appropriate get_block() callback)
 - calling dax_truncate_page() instead of block_truncate_page() for DAX files
 - calling dax_zero_page_range() instead of zero_user() for DAX files
 - ensuring that there is sufficient locking between reads, writes,
diff --git a/fs/dax.c b/fs/dax.c
index a7f77e1fa18c25..15f8ffc13fa6e0 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -494,6 +494,158 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 }
 EXPORT_SYMBOL_GPL(dax_fault);
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+/*
+ * The 'colour' (ie low bits) within a PMD of a page offset.  This comes up
+ * more often than one might expect in the below function.
+ */
+#define PG_PMD_COLOUR	((PMD_SIZE >> PAGE_SHIFT) - 1)
+
+int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
+		pmd_t *pmd, unsigned int flags, get_block_t get_block,
+		dax_iodone_t complete_unwritten)
+{
+	struct file *file = vma->vm_file;
+	struct address_space *mapping = file->f_mapping;
+	struct inode *inode = mapping->host;
+	struct buffer_head bh;
+	unsigned blkbits = inode->i_blkbits;
+	unsigned long pmd_addr = address & PMD_MASK;
+	bool write = flags & FAULT_FLAG_WRITE;
+	long length;
+	void *kaddr;
+	pgoff_t size, pgoff;
+	sector_t block, sector;
+	unsigned long pfn;
+	int result = 0;
+
+	/* Fall back to PTEs if we're going to COW */
+	if (write && !(vma->vm_flags & VM_SHARED))
+		return VM_FAULT_FALLBACK;
+	/* If the PMD would extend outside the VMA */
+	if (pmd_addr < vma->vm_start)
+		return VM_FAULT_FALLBACK;
+	if ((pmd_addr + PMD_SIZE) > vma->vm_end)
+		return VM_FAULT_FALLBACK;
+
+	pgoff = ((pmd_addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	if (pgoff >= size)
+		return VM_FAULT_SIGBUS;
+	/* If the PMD would cover blocks out of the file */
+	if ((pgoff | PG_PMD_COLOUR) >= size)
+		return VM_FAULT_FALLBACK;
+
+	memset(&bh, 0, sizeof(bh));
+	block = (sector_t)pgoff << (PAGE_SHIFT - blkbits);
+
+	bh.b_size = PMD_SIZE;
+	length = get_block(inode, block, &bh, write);
+	if (length)
+		return VM_FAULT_SIGBUS;
+	i_mmap_lock_read(mapping);
+
+	/*
+	 * If the filesystem isn't willing to tell us the length of a hole,
+	 * just fall back to PTEs.  Calling get_block 512 times in a loop
+	 * would be silly.
+	 */
+	if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE)
+		goto fallback;
+
+	/* Guard against a race with truncate */
+	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	if (pgoff >= size) {
+		result = VM_FAULT_SIGBUS;
+		goto out;
+	}
+	if ((pgoff | PG_PMD_COLOUR) >= size)
+		goto fallback;
+
+	if (is_huge_zero_pmd(*pmd))
+		unmap_mapping_range(mapping, pgoff << PAGE_SHIFT, PMD_SIZE, 0);
+
+	if (!write && !buffer_mapped(&bh) && buffer_uptodate(&bh)) {
+		bool set;
+		spinlock_t *ptl;
+		struct mm_struct *mm = vma->vm_mm;
+		struct page *zero_page = get_huge_zero_page();
+		if (unlikely(!zero_page))
+			goto fallback;
+
+		ptl = pmd_lock(mm, pmd);
+		set = set_huge_zero_page(NULL, mm, vma, pmd_addr, pmd,
+								zero_page);
+		spin_unlock(ptl);
+		result = VM_FAULT_NOPAGE;
+	} else {
+		sector = bh.b_blocknr << (blkbits - 9);
+		length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn,
+						bh.b_size);
+		if (length < 0) {
+			result = VM_FAULT_SIGBUS;
+			goto out;
+		}
+		if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR))
+			goto fallback;
+
+		if (buffer_unwritten(&bh) || buffer_new(&bh)) {
+			int i;
+			for (i = 0; i < PTRS_PER_PMD; i++)
+				clear_page(kaddr + i * PAGE_SIZE);
+			count_vm_event(PGMAJFAULT);
+			mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
+			result |= VM_FAULT_MAJOR;
+		}
+
+		result |= vmf_insert_pfn_pmd(vma, address, pmd, pfn, write);
+	}
+
+ out:
+	i_mmap_unlock_read(mapping);
+
+	if (buffer_unwritten(&bh))
+		complete_unwritten(&bh, !(result & VM_FAULT_ERROR));
+
+	return result;
+
+ fallback:
+	count_vm_event(THP_FAULT_FALLBACK);
+	result = VM_FAULT_FALLBACK;
+	goto out;
+}
+EXPORT_SYMBOL_GPL(__dax_pmd_fault);
+
+/**
+ * dax_pmd_fault - handle a PMD fault on a DAX file
+ * @vma: The virtual memory area where the fault occurred
+ * @vmf: The description of the fault
+ * @get_block: The filesystem method used to translate file offsets to blocks
+ *
+ * When a page fault occurs, filesystems may call this helper in their
+ * pmd_fault handler for DAX files.
+ */
+int dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
+			pmd_t *pmd, unsigned int flags, get_block_t get_block,
+			dax_iodone_t complete_unwritten)
+{
+	int result;
+	struct super_block *sb = file_inode(vma->vm_file)->i_sb;
+
+	if (flags & FAULT_FLAG_WRITE) {
+		sb_start_pagefault(sb);
+		file_update_time(vma->vm_file);
+	}
+	result = __dax_pmd_fault(vma, address, pmd, flags, get_block,
+				complete_unwritten);
+	if (flags & FAULT_FLAG_WRITE)
+		sb_end_pagefault(sb);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(dax_pmd_fault);
+#endif /* CONFIG_TRANSPARENT_HUGEPAGES */
+
 /**
  * dax_pfn_mkwrite - handle first write to DAX page
  * @vma: The virtual memory area where the fault occurred
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 9b51f9d40ad968..b415e521528de3 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -14,6 +14,20 @@ int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
 		dax_iodone_t);
 int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
 		dax_iodone_t);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *,
+				unsigned int flags, get_block_t, dax_iodone_t);
+int __dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *,
+				unsigned int flags, get_block_t, dax_iodone_t);
+#else
+static inline int dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
+				pmd_t *pmd, unsigned int flags, get_block_t gb,
+				dax_iodone_t di)
+{
+	return VM_FAULT_FALLBACK;
+}
+#define __dax_pmd_fault dax_pmd_fault
+#endif
 int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *);
 #define dax_mkwrite(vma, vmf, gb, iod)		dax_fault(vma, vmf, gb, iod)
 #define __dax_mkwrite(vma, vmf, gb, iod)	__dax_fault(vma, vmf, gb, iod)

From e7b1ea2ad6581b83f63246db48aa2c2c9bf2ec8d Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Tue, 8 Sep 2015 14:59:00 -0700
Subject: [PATCH 607/734] ext2: huge page fault support

Use DAX to provide support for huge pages.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Cc: Hillf Danton <dhillf@gmail.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext2/file.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index db4c299b7cf67d..1982c3f11aec42 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -32,6 +32,12 @@ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	return dax_fault(vma, vmf, ext2_get_block, NULL);
 }
 
+static int ext2_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
+						pmd_t *pmd, unsigned int flags)
+{
+	return dax_pmd_fault(vma, addr, pmd, flags, ext2_get_block, NULL);
+}
+
 static int ext2_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	return dax_mkwrite(vma, vmf, ext2_get_block, NULL);
@@ -39,6 +45,7 @@ static int ext2_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 
 static const struct vm_operations_struct ext2_dax_vm_ops = {
 	.fault		= ext2_dax_fault,
+	.pmd_fault	= ext2_dax_pmd_fault,
 	.page_mkwrite	= ext2_dax_mkwrite,
 	.pfn_mkwrite	= dax_pfn_mkwrite,
 };
@@ -50,7 +57,7 @@ static int ext2_file_mmap(struct file *file, struct vm_area_struct *vma)
 
 	file_accessed(file);
 	vma->vm_ops = &ext2_dax_vm_ops;
-	vma->vm_flags |= VM_MIXEDMAP;
+	vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
 	return 0;
 }
 #else

From 11bd1a9ecdd687b8a4b9b360b7e4b74a1a5e2bd5 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Tue, 8 Sep 2015 14:59:03 -0700
Subject: [PATCH 608/734] ext4: huge page fault support

Use DAX to provide support for huge pages.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Cc: Hillf Danton <dhillf@gmail.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext4/file.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index f8a631047379b9..953d519e799c07 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -211,6 +211,13 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 					/* Is this the right get_block? */
 }
 
+static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
+						pmd_t *pmd, unsigned int flags)
+{
+	return dax_pmd_fault(vma, addr, pmd, flags, ext4_get_block_write,
+				ext4_end_io_unwritten);
+}
+
 static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	return dax_mkwrite(vma, vmf, ext4_get_block, ext4_end_io_unwritten);
@@ -218,6 +225,7 @@ static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 
 static const struct vm_operations_struct ext4_dax_vm_ops = {
 	.fault		= ext4_dax_fault,
+	.pmd_fault	= ext4_dax_pmd_fault,
 	.page_mkwrite	= ext4_dax_mkwrite,
 	.pfn_mkwrite	= dax_pfn_mkwrite,
 };
@@ -245,7 +253,7 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
 	file_accessed(file);
 	if (IS_DAX(file_inode(file))) {
 		vma->vm_ops = &ext4_dax_vm_ops;
-		vma->vm_flags |= VM_MIXEDMAP;
+		vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
 	} else {
 		vma->vm_ops = &ext4_file_vm_ops;
 	}

From acd76e74d80f961553861d9cf49a62cbcf496d28 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Tue, 8 Sep 2015 14:59:06 -0700
Subject: [PATCH 609/734] xfs: huge page fault support

Use DAX to provide support for huge pages.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Cc: Hillf Danton <dhillf@gmail.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/xfs/xfs_file.c  | 30 +++++++++++++++++++++++++++++-
 fs/xfs/xfs_trace.h |  1 +
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index de2c2376242bc3..e78feb400e22b2 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1546,8 +1546,36 @@ xfs_filemap_fault(
 	return ret;
 }
 
+STATIC int
+xfs_filemap_pmd_fault(
+	struct vm_area_struct	*vma,
+	unsigned long		addr,
+	pmd_t			*pmd,
+	unsigned int		flags)
+{
+	struct inode		*inode = file_inode(vma->vm_file);
+	struct xfs_inode	*ip = XFS_I(inode);
+	int			ret;
+
+	if (!IS_DAX(inode))
+		return VM_FAULT_FALLBACK;
+
+	trace_xfs_filemap_pmd_fault(ip);
+
+	sb_start_pagefault(inode->i_sb);
+	file_update_time(vma->vm_file);
+	xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
+	ret = __dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_direct,
+				    xfs_end_io_dax_write);
+	xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
+	sb_end_pagefault(inode->i_sb);
+
+	return ret;
+}
+
 static const struct vm_operations_struct xfs_file_vm_ops = {
 	.fault		= xfs_filemap_fault,
+	.pmd_fault	= xfs_filemap_pmd_fault,
 	.map_pages	= filemap_map_pages,
 	.page_mkwrite	= xfs_filemap_page_mkwrite,
 };
@@ -1560,7 +1588,7 @@ xfs_file_mmap(
 	file_accessed(filp);
 	vma->vm_ops = &xfs_file_vm_ops;
 	if (IS_DAX(file_inode(filp)))
-		vma->vm_flags |= VM_MIXEDMAP;
+		vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
 	return 0;
 }
 
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 9aeeb21bc3d018..5ed36b1e04c1af 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -687,6 +687,7 @@ DEFINE_INODE_EVENT(xfs_inode_clear_eofblocks_tag);
 DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid);
 
 DEFINE_INODE_EVENT(xfs_filemap_fault);
+DEFINE_INODE_EVENT(xfs_filemap_pmd_fault);
 DEFINE_INODE_EVENT(xfs_filemap_page_mkwrite);
 
 DECLARE_EVENT_CLASS(xfs_iref_class,

From dd8a2b6c29a3221c19ab475c8408fc2b914ccfab Mon Sep 17 00:00:00 2001
From: Valentin Rothberg <valentinrothberg@gmail.com>
Date: Tue, 8 Sep 2015 14:59:09 -0700
Subject: [PATCH 610/734] fs/dax.c: fix typo in #endif comment

Fix typo s/CONFIG_TRANSPARENT_HUGEPAGES/CONFIG_TRANSPARENT_HUGEPAGE/ in
#endif comment introduced by commit 2b26a9206d6a ("dax: add huge page
fault support").

Signed-off-by: Valentin Rothberg <valentinrothberg@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dax.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/dax.c b/fs/dax.c
index 15f8ffc13fa6e0..2deed64b7eea72 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -644,7 +644,7 @@ int dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
 	return result;
 }
 EXPORT_SYMBOL_GPL(dax_pmd_fault);
-#endif /* CONFIG_TRANSPARENT_HUGEPAGES */
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 /**
  * dax_pfn_mkwrite - handle first write to DAX page

From e676a4c191653787c3fe851fe3b9f1f33d49dac2 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Tue, 8 Sep 2015 14:59:11 -0700
Subject: [PATCH 611/734] ext4: use ext4_get_block_write() for DAX

DAX relies on the get_block function either zeroing newly allocated
blocks before they're findable by subsequent calls to get_block, or
marking newly allocated blocks as unwritten.  ext4_get_block() cannot
create unwritten extents, but ext4_get_block_write() can.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Reported-by: Andy Rudoff <andy.rudoff@intel.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext4/file.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 953d519e799c07..ca5302a256ead2 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -196,7 +196,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
 {
 	struct inode *inode = bh->b_assoc_map->host;
-	/* XXX: breaks on 32-bit > 16GB. Is that even supported? */
+	/* XXX: breaks on 32-bit > 16TB. Is that even supported? */
 	loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
 	int err;
 	if (!uptodate)
@@ -207,8 +207,7 @@ static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
 
 static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-	return dax_fault(vma, vmf, ext4_get_block, ext4_end_io_unwritten);
-					/* Is this the right get_block? */
+	return dax_fault(vma, vmf, ext4_get_block_write, ext4_end_io_unwritten);
 }
 
 static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
@@ -220,7 +219,8 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
 
 static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-	return dax_mkwrite(vma, vmf, ext4_get_block, ext4_end_io_unwritten);
+	return dax_mkwrite(vma, vmf, ext4_get_block_write,
+				ext4_end_io_unwritten);
 }
 
 static const struct vm_operations_struct ext4_dax_vm_ops = {

From ae18d6dcf57b56b984ff27fd55b4e2caf5bfbd44 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Tue, 8 Sep 2015 14:59:14 -0700
Subject: [PATCH 612/734] thp: change insert_pfn's return type to void

It would make more sense to have all the return values from
vmf_insert_pfn_pmd() encoded in one place instead of having to follow
the convention into insert_pfn().  Suggested by Jeff Moyer.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/huge_memory.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 3ea6f908a5e013..5df0d1597c15ef 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -869,7 +869,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 					    flags);
 }
 
-static int insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
+static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 		pmd_t *pmd, unsigned long pfn, pgprot_t prot, bool write)
 {
 	struct mm_struct *mm = vma->vm_mm;
@@ -887,7 +887,6 @@ static int insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 		update_mmu_cache_pmd(vma, addr, pmd);
 	}
 	spin_unlock(ptl);
-	return VM_FAULT_NOPAGE;
 }
 
 int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
@@ -909,7 +908,8 @@ int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 		return VM_FAULT_SIGBUS;
 	if (track_pfn_insert(vma, &pgprot, pfn))
 		return VM_FAULT_SIGBUS;
-	return insert_pfn_pmd(vma, addr, pmd, pfn, pgprot, write);
+	insert_pfn_pmd(vma, addr, pmd, pfn, pgprot, write);
+	return VM_FAULT_NOPAGE;
 }
 
 int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,

From 84c4e5e675408b6fb7d74eec7da9a4a5698b50af Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Tue, 8 Sep 2015 14:59:17 -0700
Subject: [PATCH 613/734] dax: improve comment about truncate race

Jan Kara pointed out I should be more explicit here about the perils of
racing against truncate.  The comment is mostly the same as for the PTE
case.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dax.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/fs/dax.c b/fs/dax.c
index 2deed64b7eea72..c694117a706208 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -553,7 +553,12 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
 	if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE)
 		goto fallback;
 
-	/* Guard against a race with truncate */
+	/*
+	 * If a truncate happened while we were allocating blocks, we may
+	 * leave blocks allocated to the file that are beyond EOF.  We can't
+	 * take i_mutex here, so just leave them hanging; they'll be freed
+	 * when the file is deleted.
+	 */
 	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	if (pgoff >= size) {
 		result = VM_FAULT_SIGBUS;

From ed923b5776a2d2e949bd5b20f3956d68f3c826b7 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Tue, 8 Sep 2015 14:59:20 -0700
Subject: [PATCH 614/734] ext4: add ext4_get_block_dax()

DAX wants different semantics from any currently-existing ext4 get_block
callback.  Unlike ext4_get_block_write(), it needs to honour the
'create' flag, and unlike ext4_get_block(), it needs to be able to
return unwritten extents.  So introduce a new ext4_get_block_dax() which
has those semantics.

We could also change ext4_get_block_write() to honour the 'create' flag,
but that might have consequences on other users that I do not currently
understand.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext4/ext4.h  |  2 ++
 fs/ext4/file.c  |  6 +++---
 fs/ext4/inode.c | 11 +++++++++++
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 32071f5c1c2623..fd1f28be529690 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2272,6 +2272,8 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
 struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
 int ext4_get_block_write(struct inode *inode, sector_t iblock,
 			 struct buffer_head *bh_result, int create);
+int ext4_get_block_dax(struct inode *inode, sector_t iblock,
+			 struct buffer_head *bh_result, int create);
 int ext4_get_block(struct inode *inode, sector_t iblock,
 				struct buffer_head *bh_result, int create);
 int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index ca5302a256ead2..d5219e4cd91dee 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -207,19 +207,19 @@ static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
 
 static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-	return dax_fault(vma, vmf, ext4_get_block_write, ext4_end_io_unwritten);
+	return dax_fault(vma, vmf, ext4_get_block_dax, ext4_end_io_unwritten);
 }
 
 static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
 						pmd_t *pmd, unsigned int flags)
 {
-	return dax_pmd_fault(vma, addr, pmd, flags, ext4_get_block_write,
+	return dax_pmd_fault(vma, addr, pmd, flags, ext4_get_block_dax,
 				ext4_end_io_unwritten);
 }
 
 static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-	return dax_mkwrite(vma, vmf, ext4_get_block_write,
+	return dax_mkwrite(vma, vmf, ext4_get_block_dax,
 				ext4_end_io_unwritten);
 }
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5ebcc7683a5c18..612fbcf76b5c48 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3021,6 +3021,17 @@ static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
 			       EXT4_GET_BLOCKS_NO_LOCK);
 }
 
+int ext4_get_block_dax(struct inode *inode, sector_t iblock,
+		   struct buffer_head *bh_result, int create)
+{
+	int flags = EXT4_GET_BLOCKS_PRE_IO | EXT4_GET_BLOCKS_UNWRIT_EXT;
+	if (create)
+		flags |= EXT4_GET_BLOCKS_CREATE;
+	ext4_debug("ext4_get_block_dax: inode %lu, create flag %d\n",
+		   inode->i_ino, create);
+	return _ext4_get_block(inode, iblock, bh_result, flags);
+}
+
 static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
 			    ssize_t size, void *private)
 {

From 01a33b4ace68bc35679a347f21d5ed6e222e30dc Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Tue, 8 Sep 2015 14:59:22 -0700
Subject: [PATCH 615/734] ext4: start transaction before calling into DAX

Jan Kara pointed out that in the case where we are writing to a hole, we
can end up with a lock inversion between the page lock and the journal
lock.  We can avoid this by starting the transaction in ext4 before
calling into DAX.  The journal lock nests inside the superblock
pagefault lock, so we have to duplicate that code from dax_fault, like
XFS does.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext4/file.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 52 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index d5219e4cd91dee..113837e7ba98d5 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -207,14 +207,63 @@ static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
 
 static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-	return dax_fault(vma, vmf, ext4_get_block_dax, ext4_end_io_unwritten);
+	int result;
+	handle_t *handle = NULL;
+	struct super_block *sb = file_inode(vma->vm_file)->i_sb;
+	bool write = vmf->flags & FAULT_FLAG_WRITE;
+
+	if (write) {
+		sb_start_pagefault(sb);
+		file_update_time(vma->vm_file);
+		handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
+						EXT4_DATA_TRANS_BLOCKS(sb));
+	}
+
+	if (IS_ERR(handle))
+		result = VM_FAULT_SIGBUS;
+	else
+		result = __dax_fault(vma, vmf, ext4_get_block_dax,
+						ext4_end_io_unwritten);
+
+	if (write) {
+		if (!IS_ERR(handle))
+			ext4_journal_stop(handle);
+		sb_end_pagefault(sb);
+	}
+
+	return result;
 }
 
 static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
 						pmd_t *pmd, unsigned int flags)
 {
-	return dax_pmd_fault(vma, addr, pmd, flags, ext4_get_block_dax,
-				ext4_end_io_unwritten);
+	int result;
+	handle_t *handle = NULL;
+	struct inode *inode = file_inode(vma->vm_file);
+	struct super_block *sb = inode->i_sb;
+	bool write = flags & FAULT_FLAG_WRITE;
+
+	if (write) {
+		sb_start_pagefault(sb);
+		file_update_time(vma->vm_file);
+		handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
+				ext4_chunk_trans_blocks(inode,
+							PMD_SIZE / PAGE_SIZE));
+	}
+
+	if (IS_ERR(handle))
+		result = VM_FAULT_SIGBUS;
+	else
+		result = __dax_pmd_fault(vma, addr, pmd, flags,
+				ext4_get_block_dax, ext4_end_io_unwritten);
+
+	if (write) {
+		if (!IS_ERR(handle))
+			ext4_journal_stop(handle);
+		sb_end_pagefault(sb);
+	}
+
+	return result;
 }
 
 static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)

From 843172978bb92997310d2f7fbc172ece423cfc02 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Tue, 8 Sep 2015 14:59:25 -0700
Subject: [PATCH 616/734] dax: fix race between simultaneous faults

If two threads write-fault on the same hole at the same time, the winner
of the race will return to userspace and complete their store, only to
have the loser overwrite their store with zeroes.  Fix this for now by
taking the i_mmap_sem for write instead of read, and do so outside the
call to get_block().  Now the loser of the race will see the block has
already been zeroed, and will not zero it again.

This severely limits our scalability.  I have ideas for improving it, but
those can wait for a later patch.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dax.c    | 33 +++++++++++++++++----------------
 mm/memory.c | 11 ++++++++---
 2 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index c694117a706208..9593f4bee32774 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -272,7 +272,6 @@ static int copy_user_bh(struct page *to, struct buffer_head *bh,
 static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
 			struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-	struct address_space *mapping = inode->i_mapping;
 	sector_t sector = bh->b_blocknr << (inode->i_blkbits - 9);
 	unsigned long vaddr = (unsigned long)vmf->virtual_address;
 	void *addr;
@@ -280,8 +279,6 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
 	pgoff_t size;
 	int error;
 
-	i_mmap_lock_read(mapping);
-
 	/*
 	 * Check truncate didn't happen while we were allocating a block.
 	 * If it did, this block may or may not be still allocated to the
@@ -309,8 +306,6 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
 	error = vm_insert_mixed(vma, vaddr, pfn);
 
  out:
-	i_mmap_unlock_read(mapping);
-
 	return error;
 }
 
@@ -372,15 +367,17 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 			 * from a read fault and we've raced with a truncate
 			 */
 			error = -EIO;
-			goto unlock_page;
+			goto unlock;
 		}
+	} else {
+		i_mmap_lock_write(mapping);
 	}
 
 	error = get_block(inode, block, &bh, 0);
 	if (!error && (bh.b_size < PAGE_SIZE))
 		error = -EIO;		/* fs corruption? */
 	if (error)
-		goto unlock_page;
+		goto unlock;
 
 	if (!buffer_mapped(&bh) && !buffer_unwritten(&bh) && !vmf->cow_page) {
 		if (vmf->flags & FAULT_FLAG_WRITE) {
@@ -391,8 +388,9 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 			if (!error && (bh.b_size < PAGE_SIZE))
 				error = -EIO;
 			if (error)
-				goto unlock_page;
+				goto unlock;
 		} else {
+			i_mmap_unlock_write(mapping);
 			return dax_load_hole(mapping, page, vmf);
 		}
 	}
@@ -404,17 +402,15 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 		else
 			clear_user_highpage(new_page, vaddr);
 		if (error)
-			goto unlock_page;
+			goto unlock;
 		vmf->page = page;
 		if (!page) {
-			i_mmap_lock_read(mapping);
 			/* Check we didn't race with truncate */
 			size = (i_size_read(inode) + PAGE_SIZE - 1) >>
 								PAGE_SHIFT;
 			if (vmf->pgoff >= size) {
-				i_mmap_unlock_read(mapping);
 				error = -EIO;
-				goto out;
+				goto unlock;
 			}
 		}
 		return VM_FAULT_LOCKED;
@@ -450,6 +446,8 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 			WARN_ON_ONCE(!(vmf->flags & FAULT_FLAG_WRITE));
 	}
 
+	if (!page)
+		i_mmap_unlock_write(mapping);
  out:
 	if (error == -ENOMEM)
 		return VM_FAULT_OOM | major;
@@ -458,11 +456,14 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 		return VM_FAULT_SIGBUS | major;
 	return VM_FAULT_NOPAGE | major;
 
- unlock_page:
+ unlock:
 	if (page) {
 		unlock_page(page);
 		page_cache_release(page);
+	} else {
+		i_mmap_unlock_write(mapping);
 	}
+
 	goto out;
 }
 EXPORT_SYMBOL(__dax_fault);
@@ -540,10 +541,10 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
 	block = (sector_t)pgoff << (PAGE_SHIFT - blkbits);
 
 	bh.b_size = PMD_SIZE;
+	i_mmap_lock_write(mapping);
 	length = get_block(inode, block, &bh, write);
 	if (length)
 		return VM_FAULT_SIGBUS;
-	i_mmap_lock_read(mapping);
 
 	/*
 	 * If the filesystem isn't willing to tell us the length of a hole,
@@ -607,11 +608,11 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
 	}
 
  out:
-	i_mmap_unlock_read(mapping);
-
 	if (buffer_unwritten(&bh))
 		complete_unwritten(&bh, !(result & VM_FAULT_ERROR));
 
+	i_mmap_unlock_write(mapping);
+
 	return result;
 
  fallback:
diff --git a/mm/memory.c b/mm/memory.c
index a3f9a8ccec0f9c..320c42e95e6951 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2427,11 +2427,16 @@ void unmap_mapping_range(struct address_space *mapping,
 		details.last_index = ULONG_MAX;
 
 
-	/* DAX uses i_mmap_lock to serialise file truncate vs page fault */
-	i_mmap_lock_write(mapping);
+	/*
+	 * DAX already holds i_mmap_lock to serialise file truncate vs
+	 * page fault and page fault vs page fault.
+	 */
+	if (!IS_DAX(mapping->host))
+		i_mmap_lock_write(mapping);
 	if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap)))
 		unmap_mapping_range_tree(&mapping->i_mmap, &details);
-	i_mmap_unlock_write(mapping);
+	if (!IS_DAX(mapping->host))
+		i_mmap_unlock_write(mapping);
 }
 EXPORT_SYMBOL(unmap_mapping_range);
 

From 5b701b846aad7909d20693bcced2522d0ce8d1bc Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Tue, 8 Sep 2015 14:59:28 -0700
Subject: [PATCH 617/734] thp: decrement refcount on huge zero page if it is
 split

The DAX code neglected to put the refcount on the huge zero page.
Also we must notify on splits.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/huge_memory.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 5df0d1597c15ef..7510b6f683e979 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2978,7 +2978,9 @@ void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address,
 	if (unlikely(!pmd_trans_huge(*pmd)))
 		goto unlock;
 	if (vma_is_dax(vma)) {
-		pmdp_huge_clear_flush(vma, haddr, pmd);
+		pmd_t _pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
+		if (is_huge_zero_pmd(_pmd))
+			put_huge_zero_page();
 	} else if (is_huge_zero_pmd(*pmd)) {
 		__split_huge_zero_page_pmd(vma, haddr, pmd);
 	} else {

From da146769004e1dd5ed06853e6d009be8ca675d5f Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Tue, 8 Sep 2015 14:59:31 -0700
Subject: [PATCH 618/734] thp: fix zap_huge_pmd() for DAX

The original DAX code assumed that pgtable_t was a pointer, which isn't
true on all architectures.  Restructure the code to not rely on that
assumption.

[willy@linux.intel.com: further fixes integrated into this patch]
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/huge_memory.c | 71 +++++++++++++++++++++---------------------------
 1 file changed, 31 insertions(+), 40 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 7510b6f683e979..96dfd9d81fcb22 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1456,50 +1456,41 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 		 pmd_t *pmd, unsigned long addr)
 {
+	pmd_t orig_pmd;
 	spinlock_t *ptl;
-	int ret = 0;
 
-	if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
-		pgtable_t pgtable;
-		pmd_t orig_pmd;
-		/*
-		 * For architectures like ppc64 we look at deposited pgtable
-		 * when calling pmdp_huge_get_and_clear. So do the
-		 * pgtable_trans_huge_withdraw after finishing pmdp related
-		 * operations.
-		 */
-		orig_pmd = pmdp_huge_get_and_clear_full(tlb->mm, addr, pmd,
-							tlb->fullmm);
-		tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
-		if (vma_is_dax(vma)) {
-			if (is_huge_zero_pmd(orig_pmd)) {
-				pgtable = NULL;
-			} else {
-				spin_unlock(ptl);
-				return 1;
-			}
-		} else {
-			pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd);
-		}
-		if (is_huge_zero_pmd(orig_pmd)) {
-			atomic_long_dec(&tlb->mm->nr_ptes);
-			spin_unlock(ptl);
+	if (__pmd_trans_huge_lock(pmd, vma, &ptl) != 1)
+		return 0;
+	/*
+	 * For architectures like ppc64 we look at deposited pgtable
+	 * when calling pmdp_huge_get_and_clear. So do the
+	 * pgtable_trans_huge_withdraw after finishing pmdp related
+	 * operations.
+	 */
+	orig_pmd = pmdp_huge_get_and_clear_full(tlb->mm, addr, pmd,
+			tlb->fullmm);
+	tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
+	if (vma_is_dax(vma)) {
+		spin_unlock(ptl);
+		if (is_huge_zero_pmd(orig_pmd))
 			put_huge_zero_page();
-		} else {
-			struct page *page = pmd_page(orig_pmd);
-			page_remove_rmap(page);
-			VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
-			add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
-			VM_BUG_ON_PAGE(!PageHead(page), page);
-			atomic_long_dec(&tlb->mm->nr_ptes);
-			spin_unlock(ptl);
-			tlb_remove_page(tlb, page);
-		}
-		if (pgtable)
-			pte_free(tlb->mm, pgtable);
-		ret = 1;
+	} else if (is_huge_zero_pmd(orig_pmd)) {
+		pte_free(tlb->mm, pgtable_trans_huge_withdraw(tlb->mm, pmd));
+		atomic_long_dec(&tlb->mm->nr_ptes);
+		spin_unlock(ptl);
+		put_huge_zero_page();
+	} else {
+		struct page *page = pmd_page(orig_pmd);
+		page_remove_rmap(page);
+		VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
+		add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
+		VM_BUG_ON_PAGE(!PageHead(page), page);
+		pte_free(tlb->mm, pgtable_trans_huge_withdraw(tlb->mm, pmd));
+		atomic_long_dec(&tlb->mm->nr_ptes);
+		spin_unlock(ptl);
+		tlb_remove_page(tlb, page);
 	}
-	return ret;
+	return 1;
 }
 
 int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,

From d295e3415a88ae63a37a22652808b20c7fcb970e Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Tue, 8 Sep 2015 14:59:34 -0700
Subject: [PATCH 619/734] dax: don't use set_huge_zero_page()

This is another place where DAX assumed that pgtable_t was a pointer.
Open code the important parts of set_huge_zero_page() in DAX and make
set_huge_zero_page() static again.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dax.c                | 18 ++++++++++++------
 include/linux/huge_mm.h |  3 ---
 mm/huge_memory.c        |  2 +-
 3 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 9593f4bee32774..d778e5f1a01cf3 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -572,18 +572,24 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
 		unmap_mapping_range(mapping, pgoff << PAGE_SHIFT, PMD_SIZE, 0);
 
 	if (!write && !buffer_mapped(&bh) && buffer_uptodate(&bh)) {
-		bool set;
 		spinlock_t *ptl;
-		struct mm_struct *mm = vma->vm_mm;
+		pmd_t entry;
 		struct page *zero_page = get_huge_zero_page();
+
 		if (unlikely(!zero_page))
 			goto fallback;
 
-		ptl = pmd_lock(mm, pmd);
-		set = set_huge_zero_page(NULL, mm, vma, pmd_addr, pmd,
-								zero_page);
-		spin_unlock(ptl);
+		ptl = pmd_lock(vma->vm_mm, pmd);
+		if (!pmd_none(*pmd)) {
+			spin_unlock(ptl);
+			goto fallback;
+		}
+
+		entry = mk_pmd(zero_page, vma->vm_page_prot);
+		entry = pmd_mkhuge(entry);
+		set_pmd_at(vma->vm_mm, pmd_addr, pmd, entry);
 		result = VM_FAULT_NOPAGE;
+		spin_unlock(ptl);
 	} else {
 		sector = bh.b_blocknr << (blkbits - 9);
 		length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn,
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index f9b612fec4dd82..ecb080d6ff4207 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -163,9 +163,6 @@ static inline bool is_huge_zero_pmd(pmd_t pmd)
 }
 
 struct page *get_huge_zero_page(void);
-bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
-		struct vm_area_struct *vma, unsigned long haddr,
-		pmd_t *pmd, struct page *zero_page);
 
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 96dfd9d81fcb22..3e574efad8f853 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -790,7 +790,7 @@ static inline gfp_t alloc_hugepage_gfpmask(int defrag, gfp_t extra_gfp)
 }
 
 /* Caller must hold page table lock. */
-bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
+static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
 		struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd,
 		struct page *zero_page)
 {

From 73a6ec47f68787df1b41869def52915da2f4a6b7 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Tue, 8 Sep 2015 14:59:37 -0700
Subject: [PATCH 620/734] dax: ensure that zero pages are removed from other
 processes

If the first access to a huge page was a store, there would be no existing
zero pmd in this process's page tables.  There could be a zero pmd in
another process's page tables, if it had done a load.  We can detect this
case by noticing that the buffer_head returned from the filesystem is New,
and ensure that other processes mapping this huge page have their page
tables flushed.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Reported-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dax.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/fs/dax.c b/fs/dax.c
index d778e5f1a01cf3..74838c43be9108 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -568,7 +568,11 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
 	if ((pgoff | PG_PMD_COLOUR) >= size)
 		goto fallback;
 
-	if (is_huge_zero_pmd(*pmd))
+	/*
+	 * If we allocated new storage, make sure no process has any
+	 * zero pages covering this hole
+	 */
+	if (buffer_new(&bh))
 		unmap_mapping_range(mapping, pgoff << PAGE_SHIFT, PMD_SIZE, 0);
 
 	if (!write && !buffer_mapped(&bh) && buffer_uptodate(&bh)) {

From 3fdd1b479dbc03347e98f904f54133a9cef5521f Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Tue, 8 Sep 2015 14:59:39 -0700
Subject: [PATCH 621/734] dax: use linear_page_index()

I was basically open-coding it (thanks to copying code from do_fault()
which probably also needs to be fixed).

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dax.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/dax.c b/fs/dax.c
index 74838c43be9108..9ef9b80cc1323d 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -529,7 +529,7 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
 	if ((pmd_addr + PMD_SIZE) > vma->vm_end)
 		return VM_FAULT_FALLBACK;
 
-	pgoff = ((pmd_addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+	pgoff = linear_page_index(vma, pmd_addr);
 	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	if (pgoff >= size)
 		return VM_FAULT_SIGBUS;

From 46c043ede4711e8d598b9d63c5616c1fedb0605e Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Tue, 8 Sep 2015 14:59:42 -0700
Subject: [PATCH 622/734] mm: take i_mmap_lock in unmap_mapping_range() for DAX

DAX is not so special: we need i_mmap_lock to protect mapping->i_mmap.

__dax_pmd_fault() uses unmap_mapping_range() shoot out zero page from
all mappings.  We need to drop i_mmap_lock there to avoid lock deadlock.

Re-aquiring the lock should be fine since we check i_size after the
point.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dax.c    | 35 +++++++++++++++++++----------------
 mm/memory.c | 11 ++---------
 2 files changed, 21 insertions(+), 25 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 9ef9b80cc1323d..ed54efedade621 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -554,6 +554,25 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
 	if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE)
 		goto fallback;
 
+	if (buffer_unwritten(&bh) || buffer_new(&bh)) {
+		int i;
+		for (i = 0; i < PTRS_PER_PMD; i++)
+			clear_page(kaddr + i * PAGE_SIZE);
+		count_vm_event(PGMAJFAULT);
+		mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
+		result |= VM_FAULT_MAJOR;
+	}
+
+	/*
+	 * If we allocated new storage, make sure no process has any
+	 * zero pages covering this hole
+	 */
+	if (buffer_new(&bh)) {
+		i_mmap_unlock_write(mapping);
+		unmap_mapping_range(mapping, pgoff << PAGE_SHIFT, PMD_SIZE, 0);
+		i_mmap_lock_write(mapping);
+	}
+
 	/*
 	 * If a truncate happened while we were allocating blocks, we may
 	 * leave blocks allocated to the file that are beyond EOF.  We can't
@@ -568,13 +587,6 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
 	if ((pgoff | PG_PMD_COLOUR) >= size)
 		goto fallback;
 
-	/*
-	 * If we allocated new storage, make sure no process has any
-	 * zero pages covering this hole
-	 */
-	if (buffer_new(&bh))
-		unmap_mapping_range(mapping, pgoff << PAGE_SHIFT, PMD_SIZE, 0);
-
 	if (!write && !buffer_mapped(&bh) && buffer_uptodate(&bh)) {
 		spinlock_t *ptl;
 		pmd_t entry;
@@ -605,15 +617,6 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
 		if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR))
 			goto fallback;
 
-		if (buffer_unwritten(&bh) || buffer_new(&bh)) {
-			int i;
-			for (i = 0; i < PTRS_PER_PMD; i++)
-				clear_page(kaddr + i * PAGE_SIZE);
-			count_vm_event(PGMAJFAULT);
-			mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
-			result |= VM_FAULT_MAJOR;
-		}
-
 		result |= vmf_insert_pfn_pmd(vma, address, pmd, pfn, write);
 	}
 
diff --git a/mm/memory.c b/mm/memory.c
index 320c42e95e6951..ce8e983f3c4dbf 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2426,17 +2426,10 @@ void unmap_mapping_range(struct address_space *mapping,
 	if (details.last_index < details.first_index)
 		details.last_index = ULONG_MAX;
 
-
-	/*
-	 * DAX already holds i_mmap_lock to serialise file truncate vs
-	 * page fault and page fault vs page fault.
-	 */
-	if (!IS_DAX(mapping->host))
-		i_mmap_lock_write(mapping);
+	i_mmap_lock_write(mapping);
 	if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap)))
 		unmap_mapping_range_tree(&mapping->i_mmap, &details);
-	if (!IS_DAX(mapping->host))
-		i_mmap_unlock_write(mapping);
+	i_mmap_unlock_write(mapping);
 }
 EXPORT_SYMBOL(unmap_mapping_range);
 

From 52a2b53ffde6d6018dfc454fbde34383351fb896 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Tue, 8 Sep 2015 14:59:45 -0700
Subject: [PATCH 623/734] mm, dax: use i_mmap_unlock_write() in do_cow_fault()

__dax_fault() takes i_mmap_lock for write. Let's pair it with write
unlock on do_cow_fault() side.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index ce8e983f3c4dbf..6cd0b216040190 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3013,9 +3013,9 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		} else {
 			/*
 			 * The fault handler has no page to lock, so it holds
-			 * i_mmap_lock for read to protect against truncate.
+			 * i_mmap_lock for write to protect against truncate.
 			 */
-			i_mmap_unlock_read(vma->vm_file->f_mapping);
+			i_mmap_unlock_write(vma->vm_file->f_mapping);
 		}
 		goto uncharge_out;
 	}
@@ -3029,9 +3029,9 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	} else {
 		/*
 		 * The fault handler has no page to lock, so it holds
-		 * i_mmap_lock for read to protect against truncate.
+		 * i_mmap_lock for write to protect against truncate.
 		 */
-		i_mmap_unlock_read(vma->vm_file->f_mapping);
+		i_mmap_unlock_write(vma->vm_file->f_mapping);
 	}
 	return ret;
 uncharge_out:

From 904a9553d4fcdc0c7d5621f6178f0e07598701dc Mon Sep 17 00:00:00 2001
From: Wei Yang <weiyang@linux.vnet.ibm.com>
Date: Tue, 8 Sep 2015 14:59:48 -0700
Subject: [PATCH 624/734] mm/page_alloc.c: refine the calculation of highest
 possible node id

nr_node_ids records the highest possible node id, which is calculated by
scanning the bitmap node_states[N_POSSIBLE].  Current implementation
scan the bitmap from the beginning, which will scan the whole bitmap.

This patch reverses the order by scanning from the end with
find_last_bit().

Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
Cc: Tejun Heo <tj@kernel.org>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5b5240b7f642de..809e27e77b3a39 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5478,11 +5478,9 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
  */
 void __init setup_nr_node_ids(void)
 {
-	unsigned int node;
-	unsigned int highest = 0;
+	unsigned int highest;
 
-	for_each_node_mask(node, node_possible_map)
-		highest = node;
+	highest = find_last_bit(node_possible_map.bits, MAX_NUMNODES);
 	nr_node_ids = highest + 1;
 }
 #endif

From 7f3eb55bfad8a6dfd880559210f5b21737d69815 Mon Sep 17 00:00:00 2001
From: Wei Yang <weiyang@linux.vnet.ibm.com>
Date: Tue, 8 Sep 2015 14:59:50 -0700
Subject: [PATCH 625/734] mm/page_alloc.c: remove unused variable in
 free_area_init_core()

Commit febd5949e134 ("mm/memory hotplug: init the zone's size when
calculating node totalpages") refines the function
free_area_init_core().

After doing so, these two parameters are not used anymore.

This patch removes these two parameters.

Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
Cc: Gu Zheng <guz.fnst@cn.fujitsu.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 809e27e77b3a39..badc7d3bde4334 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5303,8 +5303,7 @@ static unsigned long __paginginit calc_memmap_size(unsigned long spanned_pages,
  *
  * NOTE: pgdat should get zeroed by caller.
  */
-static void __paginginit free_area_init_core(struct pglist_data *pgdat,
-		unsigned long node_start_pfn, unsigned long node_end_pfn)
+static void __paginginit free_area_init_core(struct pglist_data *pgdat)
 {
 	enum zone_type j;
 	int nid = pgdat->node_id;
@@ -5467,7 +5466,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
 		(unsigned long)pgdat->node_mem_map);
 #endif
 
-	free_area_init_core(pgdat, start_pfn, end_pfn);
+	free_area_init_core(pgdat);
 }
 
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP

From 4fcab5f437c481e51c270a2d12ef56a3f2367371 Mon Sep 17 00:00:00 2001
From: Wei Yang <weiyang@linux.vnet.ibm.com>
Date: Tue, 8 Sep 2015 14:59:53 -0700
Subject: [PATCH 626/734] mm/memblock.c: WARN_ON when flags differs from
 overlap region

Each memblock_region has flags to indicates the type of this range. For
the overlap case, memblock_add_range() inserts the lower part and leave the
upper part as indicated in the overlapped region.

If the flags of the new range differs from the overlapped region, the
information recorded is not correct.

This patch adds a WARN_ON when the flags of the new range differs from the
overlapped region.

Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memblock.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/memblock.c b/mm/memblock.c
index 95ce68c6da8adc..bde61e8c28c55b 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -569,6 +569,7 @@ int __init_memblock memblock_add_range(struct memblock_type *type,
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 			WARN_ON(nid != memblock_get_region_node(rgn));
 #endif
+			WARN_ON(flags != rgn->flags);
 			nr_new++;
 			if (insert)
 				memblock_insert_region(type, i++, base,

From b5e3aa0a4d5e35329203fd09acb0dbc7f7fd64de Mon Sep 17 00:00:00 2001
From: Vineet Gupta <Vineet.Gupta1@synopsys.com>
Date: Tue, 8 Sep 2015 14:59:56 -0700
Subject: [PATCH 627/734] mm: remove put_page_unless_one()

It has no callers.

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index e1fbd18b8c4bb5..4ec72ef1f04a91 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -361,18 +361,6 @@ static inline int get_page_unless_zero(struct page *page)
 	return atomic_inc_not_zero(&page->_count);
 }
 
-/*
- * Try to drop a ref unless the page has a refcount of one, return false if
- * that is the case.
- * This is to make sure that the refcount won't become zero after this drop.
- * This can be called when MMU is off so it must not access
- * any of the virtual mappings.
- */
-static inline int put_page_unless_one(struct page *page)
-{
-	return atomic_add_unless(&page->_count, -1, 1);
-}
-
 extern int page_is_ram(unsigned long pfn);
 extern int region_is_ram(resource_size_t phys_addr, unsigned long size);
 

From a06db751c321546e5563041956a57613259c6720 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Tue, 8 Sep 2015 14:59:59 -0700
Subject: [PATCH 628/734] pagemap: check permissions and capabilities at open
 time

This patchset makes pagemap useable again in the safe way (after row
hammer bug it was made CAP_SYS_ADMIN-only).  This patchset restores access
for non-privileged users but hides PFNs from them.

Also it adds bit 'map-exclusive' which is set if page is mapped only here:
it helps in estimation of working set without exposing pfns and allows to
distinguish CoWed and non-CoWed private anonymous pages.

Second patch removes page-shift bits and completes migration to the new
pagemap format: flags soft-dirty and mmap-exclusive are available only in
the new format.

This patch (of 5):

This patch moves permission checks from pagemap_read() into pagemap_open().

Pointer to mm is saved in file->private_data. This reference pins only
mm_struct itself. /proc/*/mem, maps, smaps already work in the same way.

See http://lkml.kernel.org/r/CA+55aFyKpWrt_Ajzh1rzp_GcwZ4=6Y=kOv8hBz172CFJp6L8Tg@mail.gmail.com

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Reviewed-by: Mark Williamson <mwilliamson@undo-software.com>
Tested-by:  Mark Williamson <mwilliamson@undo-software.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c | 48 +++++++++++++++++++++++++++-------------------
 1 file changed, 28 insertions(+), 20 deletions(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 3b4d8255e8068d..07c86f51d225f4 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1229,40 +1229,33 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
 static ssize_t pagemap_read(struct file *file, char __user *buf,
 			    size_t count, loff_t *ppos)
 {
-	struct task_struct *task = get_proc_task(file_inode(file));
-	struct mm_struct *mm;
+	struct mm_struct *mm = file->private_data;
 	struct pagemapread pm;
-	int ret = -ESRCH;
 	struct mm_walk pagemap_walk = {};
 	unsigned long src;
 	unsigned long svpfn;
 	unsigned long start_vaddr;
 	unsigned long end_vaddr;
-	int copied = 0;
+	int ret = 0, copied = 0;
 
-	if (!task)
+	if (!mm || !atomic_inc_not_zero(&mm->mm_users))
 		goto out;
 
 	ret = -EINVAL;
 	/* file position must be aligned */
 	if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES))
-		goto out_task;
+		goto out_mm;
 
 	ret = 0;
 	if (!count)
-		goto out_task;
+		goto out_mm;
 
 	pm.v2 = soft_dirty_cleared;
 	pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
 	pm.buffer = kmalloc(pm.len * PM_ENTRY_BYTES, GFP_TEMPORARY);
 	ret = -ENOMEM;
 	if (!pm.buffer)
-		goto out_task;
-
-	mm = mm_access(task, PTRACE_MODE_READ);
-	ret = PTR_ERR(mm);
-	if (!mm || IS_ERR(mm))
-		goto out_free;
+		goto out_mm;
 
 	pagemap_walk.pmd_entry = pagemap_pte_range;
 	pagemap_walk.pte_hole = pagemap_pte_hole;
@@ -1275,10 +1268,10 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 	src = *ppos;
 	svpfn = src / PM_ENTRY_BYTES;
 	start_vaddr = svpfn << PAGE_SHIFT;
-	end_vaddr = TASK_SIZE_OF(task);
+	end_vaddr = mm->task_size;
 
 	/* watch out for wraparound */
-	if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT)
+	if (svpfn > mm->task_size >> PAGE_SHIFT)
 		start_vaddr = end_vaddr;
 
 	/*
@@ -1305,7 +1298,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 		len = min(count, PM_ENTRY_BYTES * pm.pos);
 		if (copy_to_user(buf, pm.buffer, len)) {
 			ret = -EFAULT;
-			goto out_mm;
+			goto out_free;
 		}
 		copied += len;
 		buf += len;
@@ -1315,24 +1308,38 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 	if (!ret || ret == PM_END_OF_BUFFER)
 		ret = copied;
 
-out_mm:
-	mmput(mm);
 out_free:
 	kfree(pm.buffer);
-out_task:
-	put_task_struct(task);
+out_mm:
+	mmput(mm);
 out:
 	return ret;
 }
 
 static int pagemap_open(struct inode *inode, struct file *file)
 {
+	struct mm_struct *mm;
+
 	/* do not disclose physical addresses: attack vector */
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 	pr_warn_once("Bits 55-60 of /proc/PID/pagemap entries are about "
 			"to stop being page-shift some time soon. See the "
 			"linux/Documentation/vm/pagemap.txt for details.\n");
+
+	mm = proc_mem_open(inode, PTRACE_MODE_READ);
+	if (IS_ERR(mm))
+		return PTR_ERR(mm);
+	file->private_data = mm;
+	return 0;
+}
+
+static int pagemap_release(struct inode *inode, struct file *file)
+{
+	struct mm_struct *mm = file->private_data;
+
+	if (mm)
+		mmdrop(mm);
 	return 0;
 }
 
@@ -1340,6 +1347,7 @@ const struct file_operations proc_pagemap_operations = {
 	.llseek		= mem_lseek, /* borrow this */
 	.read		= pagemap_read,
 	.open		= pagemap_open,
+	.release	= pagemap_release,
 };
 #endif /* CONFIG_PROC_PAGE_MONITOR */
 

From deb945441b9408d6cd15751f5232eeca9f50a5a1 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Tue, 8 Sep 2015 15:00:02 -0700
Subject: [PATCH 629/734] pagemap: switch to the new format and do some cleanup

This patch removes page-shift bits (scheduled to remove since 3.11) and
completes migration to the new bit layout.  Also it cleans messy macro.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Mark Williamson <mwilliamson@undo-software.com>
Tested-by:  Mark Williamson <mwilliamson@undo-software.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c    | 150 +++++++++++++++---------------------------
 tools/vm/page-types.c |  25 +++----
 2 files changed, 61 insertions(+), 114 deletions(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 07c86f51d225f4..41c0a0a500f703 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -712,23 +712,6 @@ const struct file_operations proc_tid_smaps_operations = {
 	.release	= proc_map_release,
 };
 
-/*
- * We do not want to have constant page-shift bits sitting in
- * pagemap entries and are about to reuse them some time soon.
- *
- * Here's the "migration strategy":
- * 1. when the system boots these bits remain what they are,
- *    but a warning about future change is printed in log;
- * 2. once anyone clears soft-dirty bits via clear_refs file,
- *    these flag is set to denote, that user is aware of the
- *    new API and those page-shift bits change their meaning.
- *    The respective warning is printed in dmesg;
- * 3. In a couple of releases we will remove all the mentions
- *    of page-shift in pagemap entries.
- */
-
-static bool soft_dirty_cleared __read_mostly;
-
 enum clear_refs_types {
 	CLEAR_REFS_ALL = 1,
 	CLEAR_REFS_ANON,
@@ -889,13 +872,6 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 	if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST)
 		return -EINVAL;
 
-	if (type == CLEAR_REFS_SOFT_DIRTY) {
-		soft_dirty_cleared = true;
-		pr_warn_once("The pagemap bits 55-60 has changed their meaning!"
-			     " See the linux/Documentation/vm/pagemap.txt for "
-			     "details.\n");
-	}
-
 	task = get_proc_task(file_inode(file));
 	if (!task)
 		return -ESRCH;
@@ -963,36 +939,24 @@ typedef struct {
 struct pagemapread {
 	int pos, len;		/* units: PM_ENTRY_BYTES, not bytes */
 	pagemap_entry_t *buffer;
-	bool v2;
 };
 
 #define PAGEMAP_WALK_SIZE	(PMD_SIZE)
 #define PAGEMAP_WALK_MASK	(PMD_MASK)
 
-#define PM_ENTRY_BYTES      sizeof(pagemap_entry_t)
-#define PM_STATUS_BITS      3
-#define PM_STATUS_OFFSET    (64 - PM_STATUS_BITS)
-#define PM_STATUS_MASK      (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
-#define PM_STATUS(nr)       (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK)
-#define PM_PSHIFT_BITS      6
-#define PM_PSHIFT_OFFSET    (PM_STATUS_OFFSET - PM_PSHIFT_BITS)
-#define PM_PSHIFT_MASK      (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET)
-#define __PM_PSHIFT(x)      (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
-#define PM_PFRAME_MASK      ((1LL << PM_PSHIFT_OFFSET) - 1)
-#define PM_PFRAME(x)        ((x) & PM_PFRAME_MASK)
-/* in "new" pagemap pshift bits are occupied with more status bits */
-#define PM_STATUS2(v2, x)   (__PM_PSHIFT(v2 ? x : PAGE_SHIFT))
-
-#define __PM_SOFT_DIRTY      (1LL)
-#define PM_PRESENT          PM_STATUS(4LL)
-#define PM_SWAP             PM_STATUS(2LL)
-#define PM_FILE             PM_STATUS(1LL)
-#define PM_NOT_PRESENT(v2)  PM_STATUS2(v2, 0)
+#define PM_ENTRY_BYTES		sizeof(pagemap_entry_t)
+#define PM_PFRAME_BITS		55
+#define PM_PFRAME_MASK		GENMASK_ULL(PM_PFRAME_BITS - 1, 0)
+#define PM_SOFT_DIRTY		BIT_ULL(55)
+#define PM_FILE			BIT_ULL(61)
+#define PM_SWAP			BIT_ULL(62)
+#define PM_PRESENT		BIT_ULL(63)
+
 #define PM_END_OF_BUFFER    1
 
-static inline pagemap_entry_t make_pme(u64 val)
+static inline pagemap_entry_t make_pme(u64 frame, u64 flags)
 {
-	return (pagemap_entry_t) { .pme = val };
+	return (pagemap_entry_t) { .pme = (frame & PM_PFRAME_MASK) | flags };
 }
 
 static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme,
@@ -1013,7 +977,7 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
 
 	while (addr < end) {
 		struct vm_area_struct *vma = find_vma(walk->mm, addr);
-		pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
+		pagemap_entry_t pme = make_pme(0, 0);
 		/* End of address space hole, which we mark as non-present. */
 		unsigned long hole_end;
 
@@ -1033,7 +997,7 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
 
 		/* Addresses in the VMA. */
 		if (vma->vm_flags & VM_SOFTDIRTY)
-			pme.pme |= PM_STATUS2(pm->v2, __PM_SOFT_DIRTY);
+			pme = make_pme(0, PM_SOFT_DIRTY);
 		for (; addr < min(end, vma->vm_end); addr += PAGE_SIZE) {
 			err = add_to_pagemap(addr, &pme, pm);
 			if (err)
@@ -1044,63 +1008,61 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
 	return err;
 }
 
-static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
+static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
 		struct vm_area_struct *vma, unsigned long addr, pte_t pte)
 {
-	u64 frame, flags;
+	u64 frame = 0, flags = 0;
 	struct page *page = NULL;
-	int flags2 = 0;
 
 	if (pte_present(pte)) {
 		frame = pte_pfn(pte);
-		flags = PM_PRESENT;
+		flags |= PM_PRESENT;
 		page = vm_normal_page(vma, addr, pte);
 		if (pte_soft_dirty(pte))
-			flags2 |= __PM_SOFT_DIRTY;
+			flags |= PM_SOFT_DIRTY;
 	} else if (is_swap_pte(pte)) {
 		swp_entry_t entry;
 		if (pte_swp_soft_dirty(pte))
-			flags2 |= __PM_SOFT_DIRTY;
+			flags |= PM_SOFT_DIRTY;
 		entry = pte_to_swp_entry(pte);
 		frame = swp_type(entry) |
 			(swp_offset(entry) << MAX_SWAPFILES_SHIFT);
-		flags = PM_SWAP;
+		flags |= PM_SWAP;
 		if (is_migration_entry(entry))
 			page = migration_entry_to_page(entry);
-	} else {
-		if (vma->vm_flags & VM_SOFTDIRTY)
-			flags2 |= __PM_SOFT_DIRTY;
-		*pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, flags2));
-		return;
 	}
 
 	if (page && !PageAnon(page))
 		flags |= PM_FILE;
-	if ((vma->vm_flags & VM_SOFTDIRTY))
-		flags2 |= __PM_SOFT_DIRTY;
+	if (vma->vm_flags & VM_SOFTDIRTY)
+		flags |= PM_SOFT_DIRTY;
 
-	*pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags);
+	return make_pme(frame, flags);
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
-		pmd_t pmd, int offset, int pmd_flags2)
+static pagemap_entry_t thp_pmd_to_pagemap_entry(struct pagemapread *pm,
+		pmd_t pmd, int offset, u64 flags)
 {
+	u64 frame = 0;
+
 	/*
 	 * Currently pmd for thp is always present because thp can not be
 	 * swapped-out, migrated, or HWPOISONed (split in such cases instead.)
 	 * This if-check is just to prepare for future implementation.
 	 */
-	if (pmd_present(pmd))
-		*pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset)
-				| PM_STATUS2(pm->v2, pmd_flags2) | PM_PRESENT);
-	else
-		*pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, pmd_flags2));
+	if (pmd_present(pmd)) {
+		frame = pmd_pfn(pmd) + offset;
+		flags |= PM_PRESENT;
+	}
+
+	return make_pme(frame, flags);
 }
 #else
-static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
-		pmd_t pmd, int offset, int pmd_flags2)
+static pagemap_entry_t thp_pmd_to_pagemap_entry(struct pagemapread *pm,
+		pmd_t pmd, int offset, u64 flags)
 {
+	return make_pme(0, 0);
 }
 #endif
 
@@ -1114,12 +1076,10 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 	int err = 0;
 
 	if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
-		int pmd_flags2;
+		u64 flags = 0;
 
 		if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd))
-			pmd_flags2 = __PM_SOFT_DIRTY;
-		else
-			pmd_flags2 = 0;
+			flags |= PM_SOFT_DIRTY;
 
 		for (; addr != end; addr += PAGE_SIZE) {
 			unsigned long offset;
@@ -1127,7 +1087,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 
 			offset = (addr & ~PAGEMAP_WALK_MASK) >>
 					PAGE_SHIFT;
-			thp_pmd_to_pagemap_entry(&pme, pm, *pmd, offset, pmd_flags2);
+			pme = thp_pmd_to_pagemap_entry(pm, *pmd, offset, flags);
 			err = add_to_pagemap(addr, &pme, pm);
 			if (err)
 				break;
@@ -1147,7 +1107,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 	for (; addr < end; pte++, addr += PAGE_SIZE) {
 		pagemap_entry_t pme;
 
-		pte_to_pagemap_entry(&pme, pm, vma, addr, *pte);
+		pme = pte_to_pagemap_entry(pm, vma, addr, *pte);
 		err = add_to_pagemap(addr, &pme, pm);
 		if (err)
 			break;
@@ -1160,16 +1120,17 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 }
 
 #ifdef CONFIG_HUGETLB_PAGE
-static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
-					pte_t pte, int offset, int flags2)
+static pagemap_entry_t huge_pte_to_pagemap_entry(struct pagemapread *pm,
+					pte_t pte, int offset, u64 flags)
 {
-	if (pte_present(pte))
-		*pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset)	|
-				PM_STATUS2(pm->v2, flags2)		|
-				PM_PRESENT);
-	else
-		*pme = make_pme(PM_NOT_PRESENT(pm->v2)			|
-				PM_STATUS2(pm->v2, flags2));
+	u64 frame = 0;
+
+	if (pte_present(pte)) {
+		frame = pte_pfn(pte) + offset;
+		flags |= PM_PRESENT;
+	}
+
+	return make_pme(frame, flags);
 }
 
 /* This function walks within one hugetlb entry in the single call */
@@ -1180,17 +1141,15 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
 	struct pagemapread *pm = walk->private;
 	struct vm_area_struct *vma = walk->vma;
 	int err = 0;
-	int flags2;
+	u64 flags = 0;
 	pagemap_entry_t pme;
 
 	if (vma->vm_flags & VM_SOFTDIRTY)
-		flags2 = __PM_SOFT_DIRTY;
-	else
-		flags2 = 0;
+		flags |= PM_SOFT_DIRTY;
 
 	for (; addr != end; addr += PAGE_SIZE) {
 		int offset = (addr & ~hmask) >> PAGE_SHIFT;
-		huge_pte_to_pagemap_entry(&pme, pm, *pte, offset, flags2);
+		pme = huge_pte_to_pagemap_entry(pm, *pte, offset, flags);
 		err = add_to_pagemap(addr, &pme, pm);
 		if (err)
 			return err;
@@ -1211,7 +1170,8 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
  * Bits 0-54  page frame number (PFN) if present
  * Bits 0-4   swap type if swapped
  * Bits 5-54  swap offset if swapped
- * Bits 55-60 page shift (page size = 1<<page shift)
+ * Bit  55    pte is soft-dirty (see Documentation/vm/soft-dirty.txt)
+ * Bits 56-60 zero
  * Bit  61    page is file-page or shared-anon
  * Bit  62    page swapped
  * Bit  63    page present
@@ -1250,7 +1210,6 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 	if (!count)
 		goto out_mm;
 
-	pm.v2 = soft_dirty_cleared;
 	pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
 	pm.buffer = kmalloc(pm.len * PM_ENTRY_BYTES, GFP_TEMPORARY);
 	ret = -ENOMEM;
@@ -1323,9 +1282,6 @@ static int pagemap_open(struct inode *inode, struct file *file)
 	/* do not disclose physical addresses: attack vector */
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
-	pr_warn_once("Bits 55-60 of /proc/PID/pagemap entries are about "
-			"to stop being page-shift some time soon. See the "
-			"linux/Documentation/vm/pagemap.txt for details.\n");
 
 	mm = proc_mem_open(inode, PTRACE_MODE_READ);
 	if (IS_ERR(mm))
diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c
index 8bdf16b8ba6047..603ec916716b34 100644
--- a/tools/vm/page-types.c
+++ b/tools/vm/page-types.c
@@ -57,23 +57,14 @@
  * pagemap kernel ABI bits
  */
 
-#define PM_ENTRY_BYTES      sizeof(uint64_t)
-#define PM_STATUS_BITS      3
-#define PM_STATUS_OFFSET    (64 - PM_STATUS_BITS)
-#define PM_STATUS_MASK      (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
-#define PM_STATUS(nr)       (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK)
-#define PM_PSHIFT_BITS      6
-#define PM_PSHIFT_OFFSET    (PM_STATUS_OFFSET - PM_PSHIFT_BITS)
-#define PM_PSHIFT_MASK      (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET)
-#define __PM_PSHIFT(x)      (((uint64_t) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
-#define PM_PFRAME_MASK      ((1LL << PM_PSHIFT_OFFSET) - 1)
-#define PM_PFRAME(x)        ((x) & PM_PFRAME_MASK)
-
-#define __PM_SOFT_DIRTY      (1LL)
-#define PM_PRESENT          PM_STATUS(4LL)
-#define PM_SWAP             PM_STATUS(2LL)
-#define PM_SOFT_DIRTY       __PM_PSHIFT(__PM_SOFT_DIRTY)
-
+#define PM_ENTRY_BYTES		8
+#define PM_PFRAME_BITS		55
+#define PM_PFRAME_MASK		((1LL << PM_PFRAME_BITS) - 1)
+#define PM_PFRAME(x)		((x) & PM_PFRAME_MASK)
+#define PM_SOFT_DIRTY		(1ULL << 55)
+#define PM_FILE			(1ULL << 61)
+#define PM_SWAP			(1ULL << 62)
+#define PM_PRESENT		(1ULL << 63)
 
 /*
  * kernel page flags

From 356515e7b64c2629f686109d426baaf868cdf7e8 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Tue, 8 Sep 2015 15:00:04 -0700
Subject: [PATCH 630/734] pagemap: rework hugetlb and thp report

This patch moves pmd dissection out of reporting loop: huge pages are
reported as bunch of normal pages with contiguous PFNs.

Add missing "FILE" bit in hugetlb vmas.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Reviewed-by: Mark Williamson <mwilliamson@undo-software.com>
Tested-by:  Mark Williamson <mwilliamson@undo-software.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c | 100 ++++++++++++++++++++-------------------------
 1 file changed, 44 insertions(+), 56 deletions(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 41c0a0a500f703..98ba9ea96b199c 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1040,33 +1040,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
 	return make_pme(frame, flags);
 }
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static pagemap_entry_t thp_pmd_to_pagemap_entry(struct pagemapread *pm,
-		pmd_t pmd, int offset, u64 flags)
-{
-	u64 frame = 0;
-
-	/*
-	 * Currently pmd for thp is always present because thp can not be
-	 * swapped-out, migrated, or HWPOISONed (split in such cases instead.)
-	 * This if-check is just to prepare for future implementation.
-	 */
-	if (pmd_present(pmd)) {
-		frame = pmd_pfn(pmd) + offset;
-		flags |= PM_PRESENT;
-	}
-
-	return make_pme(frame, flags);
-}
-#else
-static pagemap_entry_t thp_pmd_to_pagemap_entry(struct pagemapread *pm,
-		pmd_t pmd, int offset, u64 flags)
-{
-	return make_pme(0, 0);
-}
-#endif
-
-static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
+static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
 			     struct mm_walk *walk)
 {
 	struct vm_area_struct *vma = walk->vma;
@@ -1075,35 +1049,48 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 	pte_t *pte, *orig_pte;
 	int err = 0;
 
-	if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
-		u64 flags = 0;
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	if (pmd_trans_huge_lock(pmdp, vma, &ptl) == 1) {
+		u64 flags = 0, frame = 0;
+		pmd_t pmd = *pmdp;
 
-		if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd))
+		if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(pmd))
 			flags |= PM_SOFT_DIRTY;
 
+		/*
+		 * Currently pmd for thp is always present because thp
+		 * can not be swapped-out, migrated, or HWPOISONed
+		 * (split in such cases instead.)
+		 * This if-check is just to prepare for future implementation.
+		 */
+		if (pmd_present(pmd)) {
+			flags |= PM_PRESENT;
+			frame = pmd_pfn(pmd) +
+				((addr & ~PMD_MASK) >> PAGE_SHIFT);
+		}
+
 		for (; addr != end; addr += PAGE_SIZE) {
-			unsigned long offset;
-			pagemap_entry_t pme;
+			pagemap_entry_t pme = make_pme(frame, flags);
 
-			offset = (addr & ~PAGEMAP_WALK_MASK) >>
-					PAGE_SHIFT;
-			pme = thp_pmd_to_pagemap_entry(pm, *pmd, offset, flags);
 			err = add_to_pagemap(addr, &pme, pm);
 			if (err)
 				break;
+			if (flags & PM_PRESENT)
+				frame++;
 		}
 		spin_unlock(ptl);
 		return err;
 	}
 
-	if (pmd_trans_unstable(pmd))
+	if (pmd_trans_unstable(pmdp))
 		return 0;
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 	/*
 	 * We can assume that @vma always points to a valid one and @end never
 	 * goes beyond vma->vm_end.
 	 */
-	orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+	orig_pte = pte = pte_offset_map_lock(walk->mm, pmdp, addr, &ptl);
 	for (; addr < end; pte++, addr += PAGE_SIZE) {
 		pagemap_entry_t pme;
 
@@ -1120,39 +1107,40 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 }
 
 #ifdef CONFIG_HUGETLB_PAGE
-static pagemap_entry_t huge_pte_to_pagemap_entry(struct pagemapread *pm,
-					pte_t pte, int offset, u64 flags)
-{
-	u64 frame = 0;
-
-	if (pte_present(pte)) {
-		frame = pte_pfn(pte) + offset;
-		flags |= PM_PRESENT;
-	}
-
-	return make_pme(frame, flags);
-}
-
 /* This function walks within one hugetlb entry in the single call */
-static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
+static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
 				 unsigned long addr, unsigned long end,
 				 struct mm_walk *walk)
 {
 	struct pagemapread *pm = walk->private;
 	struct vm_area_struct *vma = walk->vma;
+	u64 flags = 0, frame = 0;
 	int err = 0;
-	u64 flags = 0;
-	pagemap_entry_t pme;
+	pte_t pte;
 
 	if (vma->vm_flags & VM_SOFTDIRTY)
 		flags |= PM_SOFT_DIRTY;
 
+	pte = huge_ptep_get(ptep);
+	if (pte_present(pte)) {
+		struct page *page = pte_page(pte);
+
+		if (!PageAnon(page))
+			flags |= PM_FILE;
+
+		flags |= PM_PRESENT;
+		frame = pte_pfn(pte) +
+			((addr & ~hmask) >> PAGE_SHIFT);
+	}
+
 	for (; addr != end; addr += PAGE_SIZE) {
-		int offset = (addr & ~hmask) >> PAGE_SHIFT;
-		pme = huge_pte_to_pagemap_entry(pm, *pte, offset, flags);
+		pagemap_entry_t pme = make_pme(frame, flags);
+
 		err = add_to_pagemap(addr, &pme, pm);
 		if (err)
 			return err;
+		if (flags & PM_PRESENT)
+			frame++;
 	}
 
 	cond_resched();
@@ -1216,7 +1204,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 	if (!pm.buffer)
 		goto out_mm;
 
-	pagemap_walk.pmd_entry = pagemap_pte_range;
+	pagemap_walk.pmd_entry = pagemap_pmd_range;
 	pagemap_walk.pte_hole = pagemap_pte_hole;
 #ifdef CONFIG_HUGETLB_PAGE
 	pagemap_walk.hugetlb_entry = pagemap_hugetlb_range;

From 1c90308e7a77af6742a97d1021cca923b23b7f0d Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Tue, 8 Sep 2015 15:00:07 -0700
Subject: [PATCH 631/734] pagemap: hide physical addresses from non-privileged
 users

This patch makes pagemap readable for normal users and hides physical
addresses from them.  For some use-cases PFN isn't required at all.

See http://lkml.kernel.org/r/1425935472-17949-1-git-send-email-kirill@shutemov.name

Fixes: ab676b7d6fbf ("pagemap: do not leak physical addresses to non-privileged userspace")
Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Reviewed-by: Mark Williamson <mwilliamson@undo-software.com>
Tested-by:  Mark Williamson <mwilliamson@undo-software.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 98ba9ea96b199c..bc651644b1b247 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -939,6 +939,7 @@ typedef struct {
 struct pagemapread {
 	int pos, len;		/* units: PM_ENTRY_BYTES, not bytes */
 	pagemap_entry_t *buffer;
+	bool show_pfn;
 };
 
 #define PAGEMAP_WALK_SIZE	(PMD_SIZE)
@@ -1015,7 +1016,8 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
 	struct page *page = NULL;
 
 	if (pte_present(pte)) {
-		frame = pte_pfn(pte);
+		if (pm->show_pfn)
+			frame = pte_pfn(pte);
 		flags |= PM_PRESENT;
 		page = vm_normal_page(vma, addr, pte);
 		if (pte_soft_dirty(pte))
@@ -1065,8 +1067,9 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
 		 */
 		if (pmd_present(pmd)) {
 			flags |= PM_PRESENT;
-			frame = pmd_pfn(pmd) +
-				((addr & ~PMD_MASK) >> PAGE_SHIFT);
+			if (pm->show_pfn)
+				frame = pmd_pfn(pmd) +
+					((addr & ~PMD_MASK) >> PAGE_SHIFT);
 		}
 
 		for (; addr != end; addr += PAGE_SIZE) {
@@ -1075,7 +1078,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
 			err = add_to_pagemap(addr, &pme, pm);
 			if (err)
 				break;
-			if (flags & PM_PRESENT)
+			if (pm->show_pfn && (flags & PM_PRESENT))
 				frame++;
 		}
 		spin_unlock(ptl);
@@ -1129,8 +1132,9 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
 			flags |= PM_FILE;
 
 		flags |= PM_PRESENT;
-		frame = pte_pfn(pte) +
-			((addr & ~hmask) >> PAGE_SHIFT);
+		if (pm->show_pfn)
+			frame = pte_pfn(pte) +
+				((addr & ~hmask) >> PAGE_SHIFT);
 	}
 
 	for (; addr != end; addr += PAGE_SIZE) {
@@ -1139,7 +1143,7 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
 		err = add_to_pagemap(addr, &pme, pm);
 		if (err)
 			return err;
-		if (flags & PM_PRESENT)
+		if (pm->show_pfn && (flags & PM_PRESENT))
 			frame++;
 	}
 
@@ -1198,6 +1202,9 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 	if (!count)
 		goto out_mm;
 
+	/* do not disclose physical addresses: attack vector */
+	pm.show_pfn = file_ns_capable(file, &init_user_ns, CAP_SYS_ADMIN);
+
 	pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
 	pm.buffer = kmalloc(pm.len * PM_ENTRY_BYTES, GFP_TEMPORARY);
 	ret = -ENOMEM;
@@ -1267,10 +1274,6 @@ static int pagemap_open(struct inode *inode, struct file *file)
 {
 	struct mm_struct *mm;
 
-	/* do not disclose physical addresses: attack vector */
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
 	mm = proc_mem_open(inode, PTRACE_MODE_READ);
 	if (IS_ERR(mm))
 		return PTR_ERR(mm);

From 77bb499bb60f4b79cca7d139c8041662860fcf87 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Tue, 8 Sep 2015 15:00:10 -0700
Subject: [PATCH 632/734] pagemap: add mmap-exclusive bit for marking pages
 mapped only here

This patch sets bit 56 in pagemap if this page is mapped only once.  It
allows to detect exclusively used pages without exposing PFN:

present file exclusive state
0       0    0         non-present
1       1    0         file page mapped somewhere else
1       1    1         file page mapped only here
1       0    0         anon non-CoWed page (shared with parent/child)
1       0    1         anon CoWed page (or never forked)

CoWed pages in (MAP_FILE | MAP_PRIVATE) areas are anon in this context.

MMap-exclusive bit doesn't reflect potential page-sharing via swapcache:
page could be mapped once but has several swap-ptes which point to it.
Application could detect that by swap bit in pagemap entry and touch that
pte via /proc/pid/mem to get real information.

See http://lkml.kernel.org/r/CAEVpBa+_RyACkhODZrRvQLs80iy0sqpdrd0AaP_-tgnX3Y9yNQ@mail.gmail.com

Requested by Mark Williamson.

[akpm@linux-foundation.org: fix spello]
Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Reviewed-by: Mark Williamson <mwilliamson@undo-software.com>
Tested-by:  Mark Williamson <mwilliamson@undo-software.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/vm/pagemap.txt |  3 ++-
 fs/proc/task_mmu.c           | 14 +++++++++++++-
 tools/vm/page-types.c        | 10 ++++++++++
 3 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/Documentation/vm/pagemap.txt b/Documentation/vm/pagemap.txt
index 6bfbc172cdb96b..56faec0f73f739 100644
--- a/Documentation/vm/pagemap.txt
+++ b/Documentation/vm/pagemap.txt
@@ -16,7 +16,8 @@ There are three components to pagemap:
     * Bits 0-4   swap type if swapped
     * Bits 5-54  swap offset if swapped
     * Bit  55    pte is soft-dirty (see Documentation/vm/soft-dirty.txt)
-    * Bits 56-60 zero
+    * Bit  56    page exclusively mapped
+    * Bits 57-60 zero
     * Bit  61    page is file-page or shared-anon
     * Bit  62    page swapped
     * Bit  63    page present
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index bc651644b1b247..67c76468a7be4a 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -949,6 +949,7 @@ struct pagemapread {
 #define PM_PFRAME_BITS		55
 #define PM_PFRAME_MASK		GENMASK_ULL(PM_PFRAME_BITS - 1, 0)
 #define PM_SOFT_DIRTY		BIT_ULL(55)
+#define PM_MMAP_EXCLUSIVE	BIT_ULL(56)
 #define PM_FILE			BIT_ULL(61)
 #define PM_SWAP			BIT_ULL(62)
 #define PM_PRESENT		BIT_ULL(63)
@@ -1036,6 +1037,8 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
 
 	if (page && !PageAnon(page))
 		flags |= PM_FILE;
+	if (page && page_mapcount(page) == 1)
+		flags |= PM_MMAP_EXCLUSIVE;
 	if (vma->vm_flags & VM_SOFTDIRTY)
 		flags |= PM_SOFT_DIRTY;
 
@@ -1066,6 +1069,11 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
 		 * This if-check is just to prepare for future implementation.
 		 */
 		if (pmd_present(pmd)) {
+			struct page *page = pmd_page(pmd);
+
+			if (page_mapcount(page) == 1)
+				flags |= PM_MMAP_EXCLUSIVE;
+
 			flags |= PM_PRESENT;
 			if (pm->show_pfn)
 				frame = pmd_pfn(pmd) +
@@ -1131,6 +1139,9 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
 		if (!PageAnon(page))
 			flags |= PM_FILE;
 
+		if (page_mapcount(page) == 1)
+			flags |= PM_MMAP_EXCLUSIVE;
+
 		flags |= PM_PRESENT;
 		if (pm->show_pfn)
 			frame = pte_pfn(pte) +
@@ -1163,7 +1174,8 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
  * Bits 0-4   swap type if swapped
  * Bits 5-54  swap offset if swapped
  * Bit  55    pte is soft-dirty (see Documentation/vm/soft-dirty.txt)
- * Bits 56-60 zero
+ * Bit  56    page exclusively mapped
+ * Bits 57-60 zero
  * Bit  61    page is file-page or shared-anon
  * Bit  62    page swapped
  * Bit  63    page present
diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c
index 603ec916716b34..7f73fa32a590b5 100644
--- a/tools/vm/page-types.c
+++ b/tools/vm/page-types.c
@@ -62,6 +62,7 @@
 #define PM_PFRAME_MASK		((1LL << PM_PFRAME_BITS) - 1)
 #define PM_PFRAME(x)		((x) & PM_PFRAME_MASK)
 #define PM_SOFT_DIRTY		(1ULL << 55)
+#define PM_MMAP_EXCLUSIVE	(1ULL << 56)
 #define PM_FILE			(1ULL << 61)
 #define PM_SWAP			(1ULL << 62)
 #define PM_PRESENT		(1ULL << 63)
@@ -91,6 +92,8 @@
 #define KPF_SLOB_FREE		49
 #define KPF_SLUB_FROZEN		50
 #define KPF_SLUB_DEBUG		51
+#define KPF_FILE		62
+#define KPF_MMAP_EXCLUSIVE	63
 
 #define KPF_ALL_BITS		((uint64_t)~0ULL)
 #define KPF_HACKERS_BITS	(0xffffULL << 32)
@@ -140,6 +143,9 @@ static const char * const page_flag_names[] = {
 	[KPF_SLOB_FREE]		= "P:slob_free",
 	[KPF_SLUB_FROZEN]	= "A:slub_frozen",
 	[KPF_SLUB_DEBUG]	= "E:slub_debug",
+
+	[KPF_FILE]		= "F:file",
+	[KPF_MMAP_EXCLUSIVE]	= "1:mmap_exclusive",
 };
 
 
@@ -443,6 +449,10 @@ static uint64_t expand_overloaded_flags(uint64_t flags, uint64_t pme)
 
 	if (pme & PM_SOFT_DIRTY)
 		flags |= BIT(SOFTDIRTY);
+	if (pme & PM_FILE)
+		flags |= BIT(FILE);
+	if (pme & PM_MMAP_EXCLUSIVE)
+		flags |= BIT(MMAP_EXCLUSIVE);
 
 	return flags;
 }

From 83b4b0bb635eee2b8e075062e4e008d1bc110ed7 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Tue, 8 Sep 2015 15:00:13 -0700
Subject: [PATCH 633/734] pagemap: update documentation

Notes about recent changes.

[akpm@linux-foundation.org: various tweaks]
Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Cc: Mark Williamson <mwilliamson@undo-software.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/vm/pagemap.txt | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/Documentation/vm/pagemap.txt b/Documentation/vm/pagemap.txt
index 56faec0f73f739..3cd38438242aef 100644
--- a/Documentation/vm/pagemap.txt
+++ b/Documentation/vm/pagemap.txt
@@ -16,12 +16,17 @@ There are three components to pagemap:
     * Bits 0-4   swap type if swapped
     * Bits 5-54  swap offset if swapped
     * Bit  55    pte is soft-dirty (see Documentation/vm/soft-dirty.txt)
-    * Bit  56    page exclusively mapped
+    * Bit  56    page exclusively mapped (since 4.2)
     * Bits 57-60 zero
-    * Bit  61    page is file-page or shared-anon
+    * Bit  61    page is file-page or shared-anon (since 3.5)
     * Bit  62    page swapped
     * Bit  63    page present
 
+   Since Linux 4.0 only users with the CAP_SYS_ADMIN capability can get PFNs.
+   In 4.0 and 4.1 opens by unprivileged fail with -EPERM.  Starting from
+   4.2 the PFN field is zeroed if the user does not have CAP_SYS_ADMIN.
+   Reason: information about PFNs helps in exploiting Rowhammer vulnerability.
+
    If the page is not present but in swap, then the PFN contains an
    encoding of the swap file number and the page's offset into the
    swap. Unmapped pages return a null PFN. This allows determining
@@ -160,3 +165,8 @@ Other notes:
 Reading from any of the files will return -EINVAL if you are not starting
 the read on an 8-byte boundary (e.g., if you sought an odd number of bytes
 into the file), or if the size of the read is not a multiple of 8 bytes.
+
+Before Linux 3.11 pagemap bits 55-60 were used for "page-shift" (which is
+always 12 at most architectures). Since Linux 3.11 their meaning changes
+after first clear of soft-dirty bits. Since Linux 4.2 they are used for
+flags unconditionally.

From 06f805965fc205e27681eee99fd2376fafd8da65 Mon Sep 17 00:00:00 2001
From: Vladimir Murzin <vladimir.murzin@arm.com>
Date: Tue, 8 Sep 2015 15:00:16 -0700
Subject: [PATCH 634/734] memtest: use kstrtouint instead of simple_strtoul

Since simple_strtoul is obsolete and memtest_pattern is type of int, use
kstrtouint instead.

Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Cc: Leon Romanovsky <leon@leon.nu>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memtest.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/mm/memtest.c b/mm/memtest.c
index 0a1cc133f6d72a..20e836138e0030 100644
--- a/mm/memtest.c
+++ b/mm/memtest.c
@@ -89,16 +89,18 @@ static void __init do_one_pass(u64 pattern, phys_addr_t start, phys_addr_t end)
 }
 
 /* default is disabled */
-static int memtest_pattern __initdata;
+static unsigned int memtest_pattern __initdata;
 
 static int __init parse_memtest(char *arg)
 {
+	int ret = 0;
+
 	if (arg)
-		memtest_pattern = simple_strtoul(arg, NULL, 0);
+		ret = kstrtouint(arg, 0, &memtest_pattern);
 	else
 		memtest_pattern = ARRAY_SIZE(patterns);
 
-	return 0;
+	return ret;
 }
 
 early_param("memtest", parse_memtest);

From f373bafcad68834761b40da9cecda842f43d4797 Mon Sep 17 00:00:00 2001
From: Vladimir Murzin <vladimir.murzin@arm.com>
Date: Tue, 8 Sep 2015 15:00:19 -0700
Subject: [PATCH 635/734] memtest: cleanup log messages

- prefer pr_info(...  to printk(KERN_INFO ...
- use %pa for phys_addr_t
- use cpu_to_be64 while printing pattern in reserve_bad_mem()

Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Cc: Leon Romanovsky <leon@leon.nu>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memtest.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/mm/memtest.c b/mm/memtest.c
index 20e836138e0030..6bcf7d89adfc88 100644
--- a/mm/memtest.c
+++ b/mm/memtest.c
@@ -31,10 +31,8 @@ static u64 patterns[] __initdata = {
 
 static void __init reserve_bad_mem(u64 pattern, phys_addr_t start_bad, phys_addr_t end_bad)
 {
-	printk(KERN_INFO "  %016llx bad mem addr %010llx - %010llx reserved\n",
-	       (unsigned long long) pattern,
-	       (unsigned long long) start_bad,
-	       (unsigned long long) end_bad);
+	pr_info("  %016llx bad mem addr %pa - %pa reserved\n",
+		cpu_to_be64(pattern), &start_bad, &end_bad);
 	memblock_reserve(start_bad, end_bad - start_bad);
 }
 
@@ -79,10 +77,8 @@ static void __init do_one_pass(u64 pattern, phys_addr_t start, phys_addr_t end)
 		this_start = clamp(this_start, start, end);
 		this_end = clamp(this_end, start, end);
 		if (this_start < this_end) {
-			printk(KERN_INFO "  %010llx - %010llx pattern %016llx\n",
-			       (unsigned long long)this_start,
-			       (unsigned long long)this_end,
-			       (unsigned long long)cpu_to_be64(pattern));
+			pr_info("  %pa - %pa pattern %016llx\n",
+				&this_start, &this_end, cpu_to_be64(pattern));
 			memtest(pattern, this_start, this_end - this_start);
 		}
 	}
@@ -113,7 +109,7 @@ void __init early_memtest(phys_addr_t start, phys_addr_t end)
 	if (!memtest_pattern)
 		return;
 
-	printk(KERN_INFO "early_memtest: # of tests: %d\n", memtest_pattern);
+	pr_info("early_memtest: # of tests: %u\n", memtest_pattern);
 	for (i = memtest_pattern-1; i < UINT_MAX; --i) {
 		idx = i % ARRAY_SIZE(patterns);
 		do_one_pass(patterns[idx], start, end);

From 3115aec4513e5bcb399235cac98a5637fe641c13 Mon Sep 17 00:00:00 2001
From: Vladimir Murzin <vladimir.murzin@arm.com>
Date: Tue, 8 Sep 2015 15:00:22 -0700
Subject: [PATCH 636/734] memtest: remove unused header files

memtest does not require these headers to be included.

Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Cc: Leon Romanovsky <leon@leon.nu>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memtest.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/mm/memtest.c b/mm/memtest.c
index 6bcf7d89adfc88..8eaa4c3a5f65a8 100644
--- a/mm/memtest.c
+++ b/mm/memtest.c
@@ -1,11 +1,6 @@
 #include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
 #include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
 #include <linux/init.h>
-#include <linux/pfn.h>
 #include <linux/memblock.h>
 
 static u64 patterns[] __initdata = {

From 8334b96221ff0dcbde4873d31eb4d84774ed8ed4 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Tue, 8 Sep 2015 15:00:24 -0700
Subject: [PATCH 637/734] mm: /proc/pid/smaps:: show proportional swap share of
 the mapping

We want to know per-process workingset size for smart memory management
on userland and we use swap(ex, zram) heavily to maximize memory
efficiency so workingset includes swap as well as RSS.

On such system, if there are lots of shared anonymous pages, it's really
hard to figure out exactly how many each process consumes memory(ie, rss
+ wap) if the system has lots of shared anonymous memory(e.g, android).

This patch introduces SwapPss field on /proc/<pid>/smaps so we can get
more exact workingset size per process.

Bongkyu tested it. Result is below.

1. 50M used swap
SwapTotal: 461976 kB
SwapFree: 411192 kB

$ adb shell cat /proc/*/smaps | grep "SwapPss:" | awk '{sum += $2} END {print sum}';
48236
$ adb shell cat /proc/*/smaps | grep "Swap:" | awk '{sum += $2} END {print sum}';
141184

2. 240M used swap
SwapTotal: 461976 kB
SwapFree: 216808 kB

$ adb shell cat /proc/*/smaps | grep "SwapPss:" | awk '{sum += $2} END {print sum}';
230315
$ adb shell cat /proc/*/smaps | grep "Swap:" | awk '{sum += $2} END {print sum}';
1387744

[akpm@linux-foundation.org: simplify kunmap_atomic() call]
Signed-off-by: Minchan Kim <minchan@kernel.org>
Reported-by: Bongkyu Kim <bongkyu.kim@lge.com>
Tested-by: Bongkyu Kim <bongkyu.kim@lge.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Jerome Marchand <jmarchan@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/proc.txt | 18 +++++++++----
 fs/proc/task_mmu.c                 | 18 +++++++++++--
 include/linux/swap.h               |  6 +++++
 mm/swapfile.c                      | 42 ++++++++++++++++++++++++++++++
 4 files changed, 77 insertions(+), 7 deletions(-)

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 6f7fafde0884e1..d411ca63c8b6ce 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -424,6 +424,7 @@ Private_Dirty:         0 kB
 Referenced:          892 kB
 Anonymous:             0 kB
 Swap:                  0 kB
+SwapPss:               0 kB
 KernelPageSize:        4 kB
 MMUPageSize:           4 kB
 Locked:              374 kB
@@ -433,16 +434,23 @@ the first of these lines shows the same information as is displayed for the
 mapping in /proc/PID/maps.  The remaining lines show the size of the mapping
 (size), the amount of the mapping that is currently resident in RAM (RSS), the
 process' proportional share of this mapping (PSS), the number of clean and
-dirty private pages in the mapping.  Note that even a page which is part of a
-MAP_SHARED mapping, but has only a single pte mapped, i.e.  is currently used
-by only one process, is accounted as private and not as shared.  "Referenced"
-indicates the amount of memory currently marked as referenced or accessed.
+dirty private pages in the mapping.
+
+The "proportional set size" (PSS) of a process is the count of pages it has
+in memory, where each page is divided by the number of processes sharing it.
+So if a process has 1000 pages all to itself, and 1000 shared with one other
+process, its PSS will be 1500.
+Note that even a page which is part of a MAP_SHARED mapping, but has only
+a single pte mapped, i.e.  is currently used by only one process, is accounted
+as private and not as shared.
+"Referenced" indicates the amount of memory currently marked as referenced or
+accessed.
 "Anonymous" shows the amount of memory that does not belong to any file.  Even
 a mapping associated with a file may contain anonymous pages: when MAP_PRIVATE
 and a page is modified, the file page is replaced by a private anonymous copy.
 "Swap" shows how much would-be-anonymous memory is also used, but out on
 swap.
-
+"SwapPss" shows proportional swap share of this mapping.
 "VmFlags" field deserves a separate description. This member represents the kernel
 flags associated with the particular virtual memory area in two letter encoded
 manner. The codes are the following:
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 67c76468a7be4a..41f1a50c10c9e1 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -446,6 +446,7 @@ struct mem_size_stats {
 	unsigned long anonymous_thp;
 	unsigned long swap;
 	u64 pss;
+	u64 swap_pss;
 };
 
 static void smaps_account(struct mem_size_stats *mss, struct page *page,
@@ -492,9 +493,20 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
 	} else if (is_swap_pte(*pte)) {
 		swp_entry_t swpent = pte_to_swp_entry(*pte);
 
-		if (!non_swap_entry(swpent))
+		if (!non_swap_entry(swpent)) {
+			int mapcount;
+
 			mss->swap += PAGE_SIZE;
-		else if (is_migration_entry(swpent))
+			mapcount = swp_swapcount(swpent);
+			if (mapcount >= 2) {
+				u64 pss_delta = (u64)PAGE_SIZE << PSS_SHIFT;
+
+				do_div(pss_delta, mapcount);
+				mss->swap_pss += pss_delta;
+			} else {
+				mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT;
+			}
+		} else if (is_migration_entry(swpent))
 			page = migration_entry_to_page(swpent);
 	}
 
@@ -640,6 +652,7 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
 		   "Anonymous:      %8lu kB\n"
 		   "AnonHugePages:  %8lu kB\n"
 		   "Swap:           %8lu kB\n"
+		   "SwapPss:        %8lu kB\n"
 		   "KernelPageSize: %8lu kB\n"
 		   "MMUPageSize:    %8lu kB\n"
 		   "Locked:         %8lu kB\n",
@@ -654,6 +667,7 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
 		   mss.anonymous >> 10,
 		   mss.anonymous_thp >> 10,
 		   mss.swap >> 10,
+		   (unsigned long)(mss.swap_pss >> (10 + PSS_SHIFT)),
 		   vma_kernel_pagesize(vma) >> 10,
 		   vma_mmu_pagesize(vma) >> 10,
 		   (vma->vm_flags & VM_LOCKED) ?
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 31496d201fdc0d..6282f1eb3d6a3f 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -431,6 +431,7 @@ extern unsigned int count_swap_pages(int, int);
 extern sector_t map_swap_page(struct page *, struct block_device **);
 extern sector_t swapdev_block(int, pgoff_t);
 extern int page_swapcount(struct page *);
+extern int swp_swapcount(swp_entry_t entry);
 extern struct swap_info_struct *page_swap_info(struct page *);
 extern int reuse_swap_page(struct page *);
 extern int try_to_free_swap(struct page *);
@@ -522,6 +523,11 @@ static inline int page_swapcount(struct page *page)
 	return 0;
 }
 
+static inline int swp_swapcount(swp_entry_t entry)
+{
+	return 0;
+}
+
 #define reuse_swap_page(page)	(page_mapcount(page) == 1)
 
 static inline int try_to_free_swap(struct page *page)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index aebc2dd6e64975..58877312cf6b94 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -874,6 +874,48 @@ int page_swapcount(struct page *page)
 	return count;
 }
 
+/*
+ * How many references to @entry are currently swapped out?
+ * This considers COUNT_CONTINUED so it returns exact answer.
+ */
+int swp_swapcount(swp_entry_t entry)
+{
+	int count, tmp_count, n;
+	struct swap_info_struct *p;
+	struct page *page;
+	pgoff_t offset;
+	unsigned char *map;
+
+	p = swap_info_get(entry);
+	if (!p)
+		return 0;
+
+	count = swap_count(p->swap_map[swp_offset(entry)]);
+	if (!(count & COUNT_CONTINUED))
+		goto out;
+
+	count &= ~COUNT_CONTINUED;
+	n = SWAP_MAP_MAX + 1;
+
+	offset = swp_offset(entry);
+	page = vmalloc_to_page(p->swap_map + offset);
+	offset &= ~PAGE_MASK;
+	VM_BUG_ON(page_private(page) != SWP_CONTINUED);
+
+	do {
+		page = list_entry(page->lru.next, struct page, lru);
+		map = kmap_atomic(page);
+		tmp_count = map[offset];
+		kunmap_atomic(map);
+
+		count += (tmp_count & ~COUNT_CONTINUED) * n;
+		n *= (SWAP_CONT_MAX + 1);
+	} while (tmp_count & COUNT_CONTINUED);
+out:
+	spin_unlock(&p->lock);
+	return count;
+}
+
 /*
  * We can write to an anon page without COW if there are no other references
  * to it.  And as a side-effect, free up its swap: because the old content

From 998ef75ddb5709bbea0bf1506cd2717348a3c647 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Tue, 8 Sep 2015 15:00:28 -0700
Subject: [PATCH 638/734] fs: do not prefault sys_write() user buffer pages

=== Short summary ====

iov_iter_fault_in_readable() works around a really rare case and we can
avoid the deadlock it addresses in another way: disable page faults and
work around copy failures by faulting after the copy in a slow path
instead of before in a hot one.

I have a little microbenchmark that does repeated, small writes to tmpfs.
This patch speeds that micro up by 6.2%.

=== Long version ===

When doing a sys_write() we have a source buffer in userspace and then a
target file page.

If both of those are the same physical page, there is a potential deadlock
that we avoid.  It would happen something like this:

1. We start the write to the file
2. Allocate page cache page and set it !Uptodate
3. Touch the userspace buffer to copy in the user data
4. Page fault (since source of the write not yet mapped)
5. Page fault code tries to lock the page and deadlocks

(more details on this below)

To avoid this, we prefault the page to guarantee that this fault does not
occur.  But, this prefault comes at a cost.  It is one of the most
expensive things that we do in a hot write() path (especially if we
compare it to the read path).  It is working around a pretty rare case.

To fix this, it's pretty simple.  We move the "prefault" code to run after
we attempt the copy.  We explicitly disable page faults _during_ the copy,
detect the copy failure, then execute the "prefault" ouside of where the
page lock needs to be held.

iov_iter_copy_from_user_atomic() actually already has an implicit
pagefault_disable() inside of it (at least on x86), but we add an explicit
one.  I don't think we can depend on every kmap_atomic() implementation to
pagefault_disable() for eternity.

===================================================

The stack trace when this happens looks like this:

  wait_on_page_bit_killable+0xc0/0xd0
  __lock_page_or_retry+0x84/0xa0
  filemap_fault+0x1ed/0x3d0
  __do_fault+0x41/0xc0
  handle_mm_fault+0x9bb/0x1210
  __do_page_fault+0x17f/0x3d0
  do_page_fault+0xc/0x10
  page_fault+0x22/0x30
  generic_perform_write+0xca/0x1a0
  __generic_file_write_iter+0x190/0x1f0
  ext4_file_write_iter+0xe9/0x460
  __vfs_write+0xaa/0xe0
  vfs_write+0xa6/0x1a0
  SyS_write+0x46/0xa0
  entry_SYSCALL_64_fastpath+0x12/0x6a
  0xffffffffffffffff

(Note, this does *NOT* happen in practice today because
 the kmap_atomic() does a pagefault_disable().  The trace
 above was obtained by taking out the pagefault_disable().)

You can trigger the deadlock with this little code snippet:

	fd = open("foo", O_RDWR);
	fdmap = mmap(NULL, len, PROT_WRITE|PROT_READ, MAP_SHARED, fd, 0);
	write(fd, &fdmap[0], 1);

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Jens Axboe <axboe@fb.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: NeilBrown <neilb@suse.de>
Cc: Matthew Wilcox <matthew.r.wilcox@intel.com>
Cc: Paul Cassella <cassella@cray.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/filemap.c | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 1283fc82545861..30d69c0c5a3863 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2473,21 +2473,6 @@ ssize_t generic_perform_write(struct file *file,
 						iov_iter_count(i));
 
 again:
-		/*
-		 * Bring in the user page that we will copy from _first_.
-		 * Otherwise there's a nasty deadlock on copying from the
-		 * same page as we're writing to, without it being marked
-		 * up-to-date.
-		 *
-		 * Not only is this an optimisation, but it is also required
-		 * to check that the address is actually valid, when atomic
-		 * usercopies are used, below.
-		 */
-		if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
-			status = -EFAULT;
-			break;
-		}
-
 		status = a_ops->write_begin(file, mapping, pos, bytes, flags,
 						&page, &fsdata);
 		if (unlikely(status < 0))
@@ -2495,8 +2480,17 @@ ssize_t generic_perform_write(struct file *file,
 
 		if (mapping_writably_mapped(mapping))
 			flush_dcache_page(page);
-
+		/*
+		 * 'page' is now locked.  If we are trying to copy from a
+		 * mapping of 'page' in userspace, the copy might fault and
+		 * would need PageUptodate() to complete.  But, page can not be
+		 * made Uptodate without acquiring the page lock, which we hold.
+		 * Deadlock.  Avoid with pagefault_disable().  Fix up below with
+		 * iov_iter_fault_in_readable().
+		 */
+		pagefault_disable();
 		copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
+		pagefault_enable();
 		flush_dcache_page(page);
 
 		status = a_ops->write_end(file, mapping, pos, bytes, copied,
@@ -2519,6 +2513,14 @@ ssize_t generic_perform_write(struct file *file,
 			 */
 			bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
 						iov_iter_single_seg_count(i));
+			/*
+			 * This is the fallback to recover if the copy from
+			 * userspace above faults.
+			 */
+			if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
+				status = -EFAULT;
+				break;
+			}
 			goto again;
 		}
 		pos += copied;

From 28c015d07507e164d93b33498b4e482ff81c0e9b Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 8 Sep 2015 15:00:31 -0700
Subject: [PATCH 639/734] mm: improve __GFP_NORETRY comment based on
 implementation

Explicitly state that __GFP_NORETRY will attempt direct reclaim and
memory compaction before returning NULL and that the oom killer is not
called in the current implementation of the page allocator.

[akpm@linux-foundation.org: s/has/have/]
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index ad35f300b9a460..3bd64b115999f7 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -63,7 +63,10 @@ struct vm_area_struct;
  * but it is definitely preferable to use the flag rather than opencode endless
  * loop around allocator.
  *
- * __GFP_NORETRY: The VM implementation must not retry indefinitely.
+ * __GFP_NORETRY: The VM implementation must not retry indefinitely and will
+ * return NULL when direct reclaim and memory compaction have failed to allow
+ * the allocation to succeed.  The OOM killer is not called with the current
+ * implementation.
  *
  * __GFP_MOVABLE: Flag that this page will be movable by the page migration
  * mechanism or reclaimed

From 2c0b80d463c6ade539d51ad03bc7c41849fb37e8 Mon Sep 17 00:00:00 2001
From: Nicholas Krause <xerofoify@gmail.com>
Date: Tue, 8 Sep 2015 15:00:33 -0700
Subject: [PATCH 640/734] mm: make set_recommended_min_free_kbytes() return
 void

This makes set_recommended_min_free_kbytes() have a return type of void as
it cannot fail.

Signed-off-by: Nicholas Krause <xerofoify@gmail.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/huge_memory.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 3e574efad8f853..71a4822c832b9b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -106,7 +106,7 @@ static struct khugepaged_scan khugepaged_scan = {
 };
 
 
-static int set_recommended_min_free_kbytes(void)
+static void set_recommended_min_free_kbytes(void)
 {
 	struct zone *zone;
 	int nr_zones = 0;
@@ -141,7 +141,6 @@ static int set_recommended_min_free_kbytes(void)
 		min_free_kbytes = recommended_min;
 	}
 	setup_per_zone_wmarks();
-	return 0;
 }
 
 static int start_stop_khugepaged(void)

From 6e0fc46dc2152d3e2d25a5d5b640ae3586c247c6 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 8 Sep 2015 15:00:36 -0700
Subject: [PATCH 641/734] mm, oom: organize oom context into struct

There are essential elements to an oom context that are passed around to
multiple functions.

Organize these elements into a new struct, struct oom_control, that
specifies the context for an oom condition.

This patch introduces no functional change.

Signed-off-by: David Rientjes <rientjes@google.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/tty/sysrq.c |  12 ++++-
 include/linux/oom.h |  25 ++++++----
 mm/memcontrol.c     |  16 ++++--
 mm/oom_kill.c       | 115 +++++++++++++++++++++-----------------------
 mm/page_alloc.c     |  10 +++-
 5 files changed, 98 insertions(+), 80 deletions(-)

diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index b5b427888b2453..ed3e258f4ee9ff 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -353,9 +353,17 @@ static struct sysrq_key_op sysrq_term_op = {
 
 static void moom_callback(struct work_struct *ignored)
 {
+	const gfp_t gfp_mask = GFP_KERNEL;
+	struct oom_control oc = {
+		.zonelist = node_zonelist(first_memory_node, gfp_mask),
+		.nodemask = NULL,
+		.gfp_mask = gfp_mask,
+		.order = 0,
+		.force_kill = true,
+	};
+
 	mutex_lock(&oom_lock);
-	if (!out_of_memory(node_zonelist(first_memory_node, GFP_KERNEL),
-			   GFP_KERNEL, 0, NULL, true))
+	if (!out_of_memory(&oc))
 		pr_info("OOM request ignored because killer is disabled\n");
 	mutex_unlock(&oom_lock);
 }
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 7deecb7bca5e3f..cb29085ded3714 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -12,6 +12,14 @@ struct notifier_block;
 struct mem_cgroup;
 struct task_struct;
 
+struct oom_control {
+	struct zonelist *zonelist;
+	nodemask_t	*nodemask;
+	gfp_t		gfp_mask;
+	int		order;
+	bool		force_kill;
+};
+
 /*
  * Types of limitations to the nodes from which allocations may occur
  */
@@ -57,21 +65,18 @@ extern unsigned long oom_badness(struct task_struct *p,
 
 extern int oom_kills_count(void);
 extern void note_oom_kill(void);
-extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
+extern void oom_kill_process(struct oom_control *oc, struct task_struct *p,
 			     unsigned int points, unsigned long totalpages,
-			     struct mem_cgroup *memcg, nodemask_t *nodemask,
-			     const char *message);
+			     struct mem_cgroup *memcg, const char *message);
 
-extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
-			       int order, const nodemask_t *nodemask,
+extern void check_panic_on_oom(struct oom_control *oc,
+			       enum oom_constraint constraint,
 			       struct mem_cgroup *memcg);
 
-extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
-		unsigned long totalpages, const nodemask_t *nodemask,
-		bool force_kill);
+extern enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
+		struct task_struct *task, unsigned long totalpages);
 
-extern bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
-		int order, nodemask_t *mask, bool force_kill);
+extern bool out_of_memory(struct oom_control *oc);
 
 extern void exit_oom_victim(void);
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1af057575ce9e6..573d90347aa2d1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1545,6 +1545,13 @@ static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
 static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 				     int order)
 {
+	struct oom_control oc = {
+		.zonelist = NULL,
+		.nodemask = NULL,
+		.gfp_mask = gfp_mask,
+		.order = order,
+		.force_kill = false,
+	};
 	struct mem_cgroup *iter;
 	unsigned long chosen_points = 0;
 	unsigned long totalpages;
@@ -1563,7 +1570,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 		goto unlock;
 	}
 
-	check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL, memcg);
+	check_panic_on_oom(&oc, CONSTRAINT_MEMCG, memcg);
 	totalpages = mem_cgroup_get_limit(memcg) ? : 1;
 	for_each_mem_cgroup_tree(iter, memcg) {
 		struct css_task_iter it;
@@ -1571,8 +1578,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 
 		css_task_iter_start(&iter->css, &it);
 		while ((task = css_task_iter_next(&it))) {
-			switch (oom_scan_process_thread(task, totalpages, NULL,
-							false)) {
+			switch (oom_scan_process_thread(&oc, task, totalpages)) {
 			case OOM_SCAN_SELECT:
 				if (chosen)
 					put_task_struct(chosen);
@@ -1610,8 +1616,8 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 
 	if (chosen) {
 		points = chosen_points * 1000 / totalpages;
-		oom_kill_process(chosen, gfp_mask, order, points, totalpages,
-				 memcg, NULL, "Memory cgroup out of memory");
+		oom_kill_process(&oc, chosen, points, totalpages, memcg,
+				 "Memory cgroup out of memory");
 	}
 unlock:
 	mutex_unlock(&oom_lock);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index dff991e0681e85..80a7cbd89d664f 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -196,27 +196,26 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
  * Determine the type of allocation constraint.
  */
 #ifdef CONFIG_NUMA
-static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
-				gfp_t gfp_mask, nodemask_t *nodemask,
-				unsigned long *totalpages)
+static enum oom_constraint constrained_alloc(struct oom_control *oc,
+					     unsigned long *totalpages)
 {
 	struct zone *zone;
 	struct zoneref *z;
-	enum zone_type high_zoneidx = gfp_zone(gfp_mask);
+	enum zone_type high_zoneidx = gfp_zone(oc->gfp_mask);
 	bool cpuset_limited = false;
 	int nid;
 
 	/* Default to all available memory */
 	*totalpages = totalram_pages + total_swap_pages;
 
-	if (!zonelist)
+	if (!oc->zonelist)
 		return CONSTRAINT_NONE;
 	/*
 	 * Reach here only when __GFP_NOFAIL is used. So, we should avoid
 	 * to kill current.We have to random task kill in this case.
 	 * Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now.
 	 */
-	if (gfp_mask & __GFP_THISNODE)
+	if (oc->gfp_mask & __GFP_THISNODE)
 		return CONSTRAINT_NONE;
 
 	/*
@@ -224,17 +223,18 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
 	 * the page allocator means a mempolicy is in effect.  Cpuset policy
 	 * is enforced in get_page_from_freelist().
 	 */
-	if (nodemask && !nodes_subset(node_states[N_MEMORY], *nodemask)) {
+	if (oc->nodemask &&
+	    !nodes_subset(node_states[N_MEMORY], *oc->nodemask)) {
 		*totalpages = total_swap_pages;
-		for_each_node_mask(nid, *nodemask)
+		for_each_node_mask(nid, *oc->nodemask)
 			*totalpages += node_spanned_pages(nid);
 		return CONSTRAINT_MEMORY_POLICY;
 	}
 
 	/* Check this allocation failure is caused by cpuset's wall function */
-	for_each_zone_zonelist_nodemask(zone, z, zonelist,
-			high_zoneidx, nodemask)
-		if (!cpuset_zone_allowed(zone, gfp_mask))
+	for_each_zone_zonelist_nodemask(zone, z, oc->zonelist,
+			high_zoneidx, oc->nodemask)
+		if (!cpuset_zone_allowed(zone, oc->gfp_mask))
 			cpuset_limited = true;
 
 	if (cpuset_limited) {
@@ -246,20 +246,18 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
 	return CONSTRAINT_NONE;
 }
 #else
-static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
-				gfp_t gfp_mask, nodemask_t *nodemask,
-				unsigned long *totalpages)
+static enum oom_constraint constrained_alloc(struct oom_control *oc,
+					     unsigned long *totalpages)
 {
 	*totalpages = totalram_pages + total_swap_pages;
 	return CONSTRAINT_NONE;
 }
 #endif
 
-enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
-		unsigned long totalpages, const nodemask_t *nodemask,
-		bool force_kill)
+enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
+			struct task_struct *task, unsigned long totalpages)
 {
-	if (oom_unkillable_task(task, NULL, nodemask))
+	if (oom_unkillable_task(task, NULL, oc->nodemask))
 		return OOM_SCAN_CONTINUE;
 
 	/*
@@ -267,7 +265,7 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
 	 * Don't allow any other task to have access to the reserves.
 	 */
 	if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
-		if (!force_kill)
+		if (!oc->force_kill)
 			return OOM_SCAN_ABORT;
 	}
 	if (!task->mm)
@@ -280,7 +278,7 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
 	if (oom_task_origin(task))
 		return OOM_SCAN_SELECT;
 
-	if (task_will_free_mem(task) && !force_kill)
+	if (task_will_free_mem(task) && !oc->force_kill)
 		return OOM_SCAN_ABORT;
 
 	return OOM_SCAN_OK;
@@ -289,12 +287,9 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
 /*
  * Simple selection loop. We chose the process with the highest
  * number of 'points'.  Returns -1 on scan abort.
- *
- * (not docbooked, we don't want this one cluttering up the manual)
  */
-static struct task_struct *select_bad_process(unsigned int *ppoints,
-		unsigned long totalpages, const nodemask_t *nodemask,
-		bool force_kill)
+static struct task_struct *select_bad_process(struct oom_control *oc,
+		unsigned int *ppoints, unsigned long totalpages)
 {
 	struct task_struct *g, *p;
 	struct task_struct *chosen = NULL;
@@ -304,8 +299,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
 	for_each_process_thread(g, p) {
 		unsigned int points;
 
-		switch (oom_scan_process_thread(p, totalpages, nodemask,
-						force_kill)) {
+		switch (oom_scan_process_thread(oc, p, totalpages)) {
 		case OOM_SCAN_SELECT:
 			chosen = p;
 			chosen_points = ULONG_MAX;
@@ -318,7 +312,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
 		case OOM_SCAN_OK:
 			break;
 		};
-		points = oom_badness(p, NULL, nodemask, totalpages);
+		points = oom_badness(p, NULL, oc->nodemask, totalpages);
 		if (!points || points < chosen_points)
 			continue;
 		/* Prefer thread group leaders for display purposes */
@@ -380,13 +374,13 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
 	rcu_read_unlock();
 }
 
-static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
-			struct mem_cgroup *memcg, const nodemask_t *nodemask)
+static void dump_header(struct oom_control *oc, struct task_struct *p,
+			struct mem_cgroup *memcg)
 {
 	task_lock(current);
 	pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, "
 		"oom_score_adj=%hd\n",
-		current->comm, gfp_mask, order,
+		current->comm, oc->gfp_mask, oc->order,
 		current->signal->oom_score_adj);
 	cpuset_print_task_mems_allowed(current);
 	task_unlock(current);
@@ -396,7 +390,7 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
 	else
 		show_mem(SHOW_MEM_FILTER_NODES);
 	if (sysctl_oom_dump_tasks)
-		dump_tasks(memcg, nodemask);
+		dump_tasks(memcg, oc->nodemask);
 }
 
 /*
@@ -487,10 +481,9 @@ void oom_killer_enable(void)
  * Must be called while holding a reference to p, which will be released upon
  * returning.
  */
-void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
+void oom_kill_process(struct oom_control *oc, struct task_struct *p,
 		      unsigned int points, unsigned long totalpages,
-		      struct mem_cgroup *memcg, nodemask_t *nodemask,
-		      const char *message)
+		      struct mem_cgroup *memcg, const char *message)
 {
 	struct task_struct *victim = p;
 	struct task_struct *child;
@@ -514,7 +507,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 	task_unlock(p);
 
 	if (__ratelimit(&oom_rs))
-		dump_header(p, gfp_mask, order, memcg, nodemask);
+		dump_header(oc, p, memcg);
 
 	task_lock(p);
 	pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n",
@@ -537,7 +530,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 			/*
 			 * oom_badness() returns 0 if the thread is unkillable
 			 */
-			child_points = oom_badness(child, memcg, nodemask,
+			child_points = oom_badness(child, memcg, oc->nodemask,
 								totalpages);
 			if (child_points > victim_points) {
 				put_task_struct(victim);
@@ -600,8 +593,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 /*
  * Determines whether the kernel must panic because of the panic_on_oom sysctl.
  */
-void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
-			int order, const nodemask_t *nodemask,
+void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint,
 			struct mem_cgroup *memcg)
 {
 	if (likely(!sysctl_panic_on_oom))
@@ -615,7 +607,7 @@ void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
 		if (constraint != CONSTRAINT_NONE)
 			return;
 	}
-	dump_header(NULL, gfp_mask, order, memcg, nodemask);
+	dump_header(oc, NULL, memcg);
 	panic("Out of memory: %s panic_on_oom is enabled\n",
 		sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
 }
@@ -635,22 +627,16 @@ int unregister_oom_notifier(struct notifier_block *nb)
 EXPORT_SYMBOL_GPL(unregister_oom_notifier);
 
 /**
- * __out_of_memory - kill the "best" process when we run out of memory
- * @zonelist: zonelist pointer
- * @gfp_mask: memory allocation flags
- * @order: amount of memory being requested as a power of 2
- * @nodemask: nodemask passed to page allocator
- * @force_kill: true if a task must be killed, even if others are exiting
+ * out_of_memory - kill the "best" process when we run out of memory
+ * @oc: pointer to struct oom_control
  *
  * If we run out of memory, we have the choice between either
  * killing a random task (bad), letting the system crash (worse)
  * OR try to be smart about which process to kill. Note that we
  * don't have to be perfect here, we just have to be good.
  */
-bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
-		   int order, nodemask_t *nodemask, bool force_kill)
+bool out_of_memory(struct oom_control *oc)
 {
-	const nodemask_t *mpol_mask;
 	struct task_struct *p;
 	unsigned long totalpages;
 	unsigned long freed = 0;
@@ -684,30 +670,29 @@ bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
 	 * Check if there were limitations on the allocation (only relevant for
 	 * NUMA) that may require different handling.
 	 */
-	constraint = constrained_alloc(zonelist, gfp_mask, nodemask,
-						&totalpages);
-	mpol_mask = (constraint == CONSTRAINT_MEMORY_POLICY) ? nodemask : NULL;
-	check_panic_on_oom(constraint, gfp_mask, order, mpol_mask, NULL);
+	constraint = constrained_alloc(oc, &totalpages);
+	if (constraint != CONSTRAINT_MEMORY_POLICY)
+		oc->nodemask = NULL;
+	check_panic_on_oom(oc, constraint, NULL);
 
 	if (sysctl_oom_kill_allocating_task && current->mm &&
-	    !oom_unkillable_task(current, NULL, nodemask) &&
+	    !oom_unkillable_task(current, NULL, oc->nodemask) &&
 	    current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
 		get_task_struct(current);
-		oom_kill_process(current, gfp_mask, order, 0, totalpages, NULL,
-				 nodemask,
+		oom_kill_process(oc, current, 0, totalpages, NULL,
 				 "Out of memory (oom_kill_allocating_task)");
 		goto out;
 	}
 
-	p = select_bad_process(&points, totalpages, mpol_mask, force_kill);
+	p = select_bad_process(oc, &points, totalpages);
 	/* Found nothing?!?! Either we hang forever, or we panic. */
 	if (!p) {
-		dump_header(NULL, gfp_mask, order, NULL, mpol_mask);
+		dump_header(oc, NULL, NULL);
 		panic("Out of memory and no killable processes...\n");
 	}
 	if (p != (void *)-1UL) {
-		oom_kill_process(p, gfp_mask, order, points, totalpages, NULL,
-				 nodemask, "Out of memory");
+		oom_kill_process(oc, p, points, totalpages, NULL,
+				 "Out of memory");
 		killed = 1;
 	}
 out:
@@ -728,13 +713,21 @@ bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
  */
 void pagefault_out_of_memory(void)
 {
+	struct oom_control oc = {
+		.zonelist = NULL,
+		.nodemask = NULL,
+		.gfp_mask = 0,
+		.order = 0,
+		.force_kill = false,
+	};
+
 	if (mem_cgroup_oom_synchronize(true))
 		return;
 
 	if (!mutex_trylock(&oom_lock))
 		return;
 
-	if (!out_of_memory(NULL, 0, 0, NULL, false)) {
+	if (!out_of_memory(&oc)) {
 		/*
 		 * There shouldn't be any user tasks runnable while the
 		 * OOM killer is disabled, so the current task has to
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index badc7d3bde4334..96536144185c61 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2693,6 +2693,13 @@ static inline struct page *
 __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
 	const struct alloc_context *ac, unsigned long *did_some_progress)
 {
+	struct oom_control oc = {
+		.zonelist = ac->zonelist,
+		.nodemask = ac->nodemask,
+		.gfp_mask = gfp_mask,
+		.order = order,
+		.force_kill = false,
+	};
 	struct page *page;
 
 	*did_some_progress = 0;
@@ -2744,8 +2751,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
 			goto out;
 	}
 	/* Exhausted what can be done so it's blamo time */
-	if (out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false)
-			|| WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL))
+	if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL))
 		*did_some_progress = 1;
 out:
 	mutex_unlock(&oom_lock);

From 54e9e29132d7caefcad470281cae06ac34a982c8 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 8 Sep 2015 15:00:39 -0700
Subject: [PATCH 642/734] mm, oom: pass an oom order of -1 when triggered by
 sysrq

The force_kill member of struct oom_control isn't needed if an order of -1
is used instead.  This is the same as order == -1 in struct
compact_control which requires full memory compaction.

This patch introduces no functional change.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/tty/sysrq.c | 3 +--
 include/linux/oom.h | 1 -
 mm/memcontrol.c     | 1 -
 mm/oom_kill.c       | 5 ++---
 mm/page_alloc.c     | 1 -
 5 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index ed3e258f4ee9ff..95b330a9ea983d 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -358,8 +358,7 @@ static void moom_callback(struct work_struct *ignored)
 		.zonelist = node_zonelist(first_memory_node, gfp_mask),
 		.nodemask = NULL,
 		.gfp_mask = gfp_mask,
-		.order = 0,
-		.force_kill = true,
+		.order = -1,
 	};
 
 	mutex_lock(&oom_lock);
diff --git a/include/linux/oom.h b/include/linux/oom.h
index cb29085ded3714..8fb67b9e611009 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -17,7 +17,6 @@ struct oom_control {
 	nodemask_t	*nodemask;
 	gfp_t		gfp_mask;
 	int		order;
-	bool		force_kill;
 };
 
 /*
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 573d90347aa2d1..9871f13fc35bd6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1550,7 +1550,6 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 		.nodemask = NULL,
 		.gfp_mask = gfp_mask,
 		.order = order,
-		.force_kill = false,
 	};
 	struct mem_cgroup *iter;
 	unsigned long chosen_points = 0;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 80a7cbd89d664f..77adc8e876aa22 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -265,7 +265,7 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
 	 * Don't allow any other task to have access to the reserves.
 	 */
 	if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
-		if (!oc->force_kill)
+		if (oc->order != -1)
 			return OOM_SCAN_ABORT;
 	}
 	if (!task->mm)
@@ -278,7 +278,7 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
 	if (oom_task_origin(task))
 		return OOM_SCAN_SELECT;
 
-	if (task_will_free_mem(task) && !oc->force_kill)
+	if (task_will_free_mem(task) && oc->order != -1)
 		return OOM_SCAN_ABORT;
 
 	return OOM_SCAN_OK;
@@ -718,7 +718,6 @@ void pagefault_out_of_memory(void)
 		.nodemask = NULL,
 		.gfp_mask = 0,
 		.order = 0,
-		.force_kill = false,
 	};
 
 	if (mem_cgroup_oom_synchronize(true))
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 96536144185c61..5f9394df19bf47 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2698,7 +2698,6 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
 		.nodemask = ac->nodemask,
 		.gfp_mask = gfp_mask,
 		.order = order,
-		.force_kill = false,
 	};
 	struct page *page;
 

From 071a4befebb655d6b31bf5c6bacd5a6df035224d Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 8 Sep 2015 15:00:42 -0700
Subject: [PATCH 643/734] mm, oom: do not panic for oom kills triggered from
 sysrq

Sysrq+f is used to kill a process either for debug or when the VM is
otherwise unresponsive.

It is not intended to trigger a panic when no process may be killed.

Avoid panicking the system for sysrq+f when no processes are killed.

Signed-off-by: David Rientjes <rientjes@google.com>
Suggested-by: Michal Hocko <mhocko@suse.cz>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/sysrq.txt | 3 ++-
 mm/oom_kill.c           | 7 +++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
index 267f39386f99f7..13f5619b2203e6 100644
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -75,7 +75,8 @@ On all -  write a character to /proc/sysrq-trigger.  e.g.:
 
 'e'     - Send a SIGTERM to all processes, except for init.
 
-'f'	- Will call oom_kill to kill a memory hog process.
+'f'	- Will call the oom killer to kill a memory hog process, but do not
+	  panic if nothing can be killed.
 
 'g'	- Used by kgdb (kernel debugger)
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 77adc8e876aa22..91dd59f63910e0 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -607,6 +607,9 @@ void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint,
 		if (constraint != CONSTRAINT_NONE)
 			return;
 	}
+	/* Do not panic for oom kills triggered by sysrq */
+	if (oc->order == -1)
+		return;
 	dump_header(oc, NULL, memcg);
 	panic("Out of memory: %s panic_on_oom is enabled\n",
 		sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
@@ -686,11 +689,11 @@ bool out_of_memory(struct oom_control *oc)
 
 	p = select_bad_process(oc, &points, totalpages);
 	/* Found nothing?!?! Either we hang forever, or we panic. */
-	if (!p) {
+	if (!p && oc->order != -1) {
 		dump_header(oc, NULL, NULL);
 		panic("Out of memory and no killable processes...\n");
 	}
-	if (p != (void *)-1UL) {
+	if (p && p != (void *)-1UL) {
 		oom_kill_process(oc, p, points, totalpages, NULL,
 				 "Out of memory");
 		killed = 1;

From 8989e4c7d4e3c30b55c998a1138cd06c92df7295 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 8 Sep 2015 15:00:44 -0700
Subject: [PATCH 644/734] mm, oom: add description of struct oom_control

Describe the purpose of struct oom_control and what each member does.

Also make gfp_mask and order const since they are never manipulated or
passed to functions that discard the qualifier.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/oom.h | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/include/linux/oom.h b/include/linux/oom.h
index 8fb67b9e611009..03e6257321f035 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -12,11 +12,25 @@ struct notifier_block;
 struct mem_cgroup;
 struct task_struct;
 
+/*
+ * Details of the page allocation that triggered the oom killer that are used to
+ * determine what should be killed.
+ */
 struct oom_control {
+	/* Used to determine cpuset */
 	struct zonelist *zonelist;
-	nodemask_t	*nodemask;
-	gfp_t		gfp_mask;
-	int		order;
+
+	/* Used to determine mempolicy */
+	nodemask_t *nodemask;
+
+	/* Used to determine cpuset and node locality requirement */
+	const gfp_t gfp_mask;
+
+	/*
+	 * order == -1 means the oom kill is required by sysrq, otherwise only
+	 * for display purposes.
+	 */
+	const int order;
 };
 
 /*

From 75e8f8b24cb0dc4951267d31f0a49e5ce2f345c4 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 8 Sep 2015 15:00:47 -0700
Subject: [PATCH 645/734] mm, oom: remove unnecessary variable

The "killed" variable in out_of_memory() can be removed since the call to
oom_kill_process() where we should block to allow the process time to
exit is obvious.

Signed-off-by: David Rientjes <rientjes@google.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/oom_kill.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 91dd59f63910e0..1ecc0bcaecc518 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -645,7 +645,6 @@ bool out_of_memory(struct oom_control *oc)
 	unsigned long freed = 0;
 	unsigned int uninitialized_var(points);
 	enum oom_constraint constraint = CONSTRAINT_NONE;
-	int killed = 0;
 
 	if (oom_killer_disabled)
 		return false;
@@ -653,7 +652,7 @@ bool out_of_memory(struct oom_control *oc)
 	blocking_notifier_call_chain(&oom_notify_list, 0, &freed);
 	if (freed > 0)
 		/* Got some memory back in the last second. */
-		goto out;
+		return true;
 
 	/*
 	 * If current has a pending SIGKILL or is exiting, then automatically
@@ -666,7 +665,7 @@ bool out_of_memory(struct oom_control *oc)
 	if (current->mm &&
 	    (fatal_signal_pending(current) || task_will_free_mem(current))) {
 		mark_oom_victim(current);
-		goto out;
+		return true;
 	}
 
 	/*
@@ -684,7 +683,7 @@ bool out_of_memory(struct oom_control *oc)
 		get_task_struct(current);
 		oom_kill_process(oc, current, 0, totalpages, NULL,
 				 "Out of memory (oom_kill_allocating_task)");
-		goto out;
+		return true;
 	}
 
 	p = select_bad_process(oc, &points, totalpages);
@@ -696,16 +695,12 @@ bool out_of_memory(struct oom_control *oc)
 	if (p && p != (void *)-1UL) {
 		oom_kill_process(oc, p, points, totalpages, NULL,
 				 "Out of memory");
-		killed = 1;
-	}
-out:
-	/*
-	 * Give the killed threads a good chance of exiting before trying to
-	 * allocate memory again.
-	 */
-	if (killed)
+		/*
+		 * Give the killed process a good chance to exit before trying
+		 * to allocate memory again.
+		 */
 		schedule_timeout_killable(1);
-
+	}
 	return true;
 }
 

From 3942d29918522ba6a393c19388301ec04df429cd Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Date: Tue, 8 Sep 2015 15:00:50 -0700
Subject: [PATCH 646/734] mm/slab_common: allow NULL cache pointer in
 kmem_cache_destroy()

kmem_cache_destroy() does not tolerate a NULL kmem_cache pointer argument
and performs a NULL-pointer dereference.  This requires additional
attention and effort from developers/reviewers and forces all
kmem_cache_destroy() callers (200+ as of 4.1) to do a NULL check

    if (cache)
        kmem_cache_destroy(cache);

Or, otherwise, be invalid kmem_cache_destroy() users.

Tweak kmem_cache_destroy() and NULL-check the pointer there.

Proposed by Andrew Morton.

Link: https://lkml.org/lkml/2015/6/8/583
Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Julia Lawall <julia.lawall@lip6.fr>
Cc: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/slab_common.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mm/slab_common.c b/mm/slab_common.c
index c26829fe4e37ea..bde04a699ab63e 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -640,6 +640,9 @@ void kmem_cache_destroy(struct kmem_cache *s)
 	bool need_rcu_barrier = false;
 	bool busy = false;
 
+	if (unlikely(!s))
+		return;
+
 	BUG_ON(!is_root_cache(s));
 
 	get_online_cpus();

From 4e3ca3e033d1eea62fa16c3fdbef4f20427bd0de Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Date: Tue, 8 Sep 2015 15:00:53 -0700
Subject: [PATCH 647/734] mm/mempool: allow NULL `pool' pointer in
 mempool_destroy()

mempool_destroy() does not tolerate a NULL mempool_t pointer argument and
performs a NULL-pointer dereference.  This requires additional attention
and effort from developers/reviewers and forces all mempool_destroy()
callers to do a NULL check

    if (pool)
        mempool_destroy(pool);

Or, otherwise, be invalid mempool_destroy() users.

Tweak mempool_destroy() and NULL-check the pointer there.

Proposed by Andrew Morton.

Link: https://lkml.org/lkml/2015/6/8/583
Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Julia Lawall <julia.lawall@lip6.fr>
Cc: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/mempool.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mm/mempool.c b/mm/mempool.c
index 2cc08de8b1db25..4c533bc51d7339 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -150,6 +150,9 @@ static void *remove_element(mempool_t *pool)
  */
 void mempool_destroy(mempool_t *pool)
 {
+	if (unlikely(!pool))
+		return;
+
 	while (pool->curr_nr) {
 		void *element = remove_element(pool);
 		pool->free(element, pool->pool_data);

From 44d7175da6ea10e353e69b586bb68bbfef89e403 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Date: Tue, 8 Sep 2015 15:00:56 -0700
Subject: [PATCH 648/734] mm/dmapool: allow NULL `pool' pointer in
 dma_pool_destroy()

dma_pool_destroy() does not tolerate a NULL dma_pool pointer argument and
performs a NULL-pointer dereference.  This requires additional attention
and effort from developers/reviewers and forces all dma_pool_destroy()
callers to do a NULL check

    if (pool)
        dma_pool_destroy(pool);

Or, otherwise, be invalid dma_pool_destroy() users.

Tweak dma_pool_destroy() and NULL-check the pointer there.

Proposed by Andrew Morton.

Link: https://lkml.org/lkml/2015/6/8/583
Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Julia Lawall <julia.lawall@lip6.fr>
Cc: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/dmapool.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mm/dmapool.c b/mm/dmapool.c
index 59d10d16f0a5d9..4b657099111f8a 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -271,6 +271,9 @@ void dma_pool_destroy(struct dma_pool *pool)
 {
 	bool empty = false;
 
+	if (unlikely(!pool))
+		return;
+
 	mutex_lock(&pools_reg_lock);
 	mutex_lock(&pools_lock);
 	list_del(&pool->pools);

From b3d9ed3fd872fc074286674ae8595ee880938bbf Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Tue, 8 Sep 2015 15:00:59 -0700
Subject: [PATCH 649/734] sparc32: do not include swap.h from pgtable_32.h

"memcg: export struct mem_cgroup" will add includes into
linux/memcontrol.h which lead to further header dependency issues as
reported by Guenter Roeck:

  In file included from include/linux/highmem.h:7:0,
                   from include/linux/bio.h:23,
                   from include/linux/writeback.h:192,
                   from include/linux/memcontrol.h:30,
                   from include/linux/swap.h:8,
                   from ./arch/sparc/include/asm/pgtable_32.h:17,
                   from ./arch/sparc/include/asm/pgtable.h:6,
                   from arch/sparc/kernel/traps_32.c:23:
  include/linux/mm.h: In function 'is_vmalloc_addr':
  include/linux/mm.h:371:17: error: 'VMALLOC_START' undeclared (first use in this function)
  include/linux/mm.h:371:17: note: each undeclared identifier is reported only once for each function it appears in
  include/linux/mm.h:371:41: error: 'VMALLOC_END' undeclared (first use in this function)
  include/linux/mm.h: In function 'maybe_mkwrite':
  include/linux/mm.h:556:3: error: implicit declaration of function 'pte_mkwrite'

The issue is that pgtable_32.h depends on swap.h to get swap_entry_t but
that goes all the way down to linux/mm.h which wants to have VMALLOC_*
which is defined later in pgtable_32.h, though.

swap_entry_t is defined in include/mm_types.h so it should be sufficient
to include this header without more dependencies.

Signed-off-by: Michal Hocko <mhocko@suse.com>
Reported-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/sparc/include/asm/pgtable_32.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
index f06b36a00a3b1e..91b963a887b781 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -14,7 +14,7 @@
 #include <asm-generic/4level-fixup.h>
 
 #include <linux/spinlock.h>
-#include <linux/swap.h>
+#include <linux/mm_types.h>
 #include <asm/types.h>
 #include <asm/pgtsrmmu.h>
 #include <asm/vaddrs.h>

From 33398cf2f360c5ce24c8a22436d52a06ad4e5eb5 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.cz>
Date: Tue, 8 Sep 2015 15:01:02 -0700
Subject: [PATCH 650/734] memcg: export struct mem_cgroup

mem_cgroup structure is defined in mm/memcontrol.c currently which means
that the code outside of this file has to use external API even for
trivial access stuff.

This patch exports mm_struct with its dependencies and makes some of the
exported functions inlines.  This even helps to reduce the code size a bit
(make defconfig + CONFIG_MEMCG=y)

  text		data    bss     dec     	 hex 	filename
  12355346        1823792 1089536 15268674         e8fb42 vmlinux.before
  12354970        1823792 1089536 15268298         e8f9ca vmlinux.after

This is not much (370B) but better than nothing.

We also save a function call in some hot paths like callers of
mem_cgroup_count_vm_event which is used for accounting.

The patch doesn't introduce any functional changes.

[vdavykov@parallels.com: inline memcg_kmem_is_active]
[vdavykov@parallels.com: do not expose type outside of CONFIG_MEMCG]
[akpm@linux-foundation.org: memcontrol.h needs eventfd.h for eventfd_ctx]
[akpm@linux-foundation.org: export mem_cgroup_from_task() to modules]
Signed-off-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: Vladimir Davydov <vdavydov@parallels.com>
Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 370 +++++++++++++++++++++++++++++++++----
 include/linux/swap.h       |  10 +-
 include/net/sock.h         |  28 ---
 mm/memcontrol.c            | 315 +------------------------------
 mm/memory-failure.c        |   2 +-
 mm/slab_common.c           |   2 +-
 mm/vmscan.c                |   2 +-
 7 files changed, 351 insertions(+), 378 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 73b02b0a8f609a..ab2f6880e27b2d 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -23,6 +23,11 @@
 #include <linux/vm_event_item.h>
 #include <linux/hardirq.h>
 #include <linux/jump_label.h>
+#include <linux/page_counter.h>
+#include <linux/vmpressure.h>
+#include <linux/eventfd.h>
+#include <linux/mmzone.h>
+#include <linux/writeback.h>
 
 struct mem_cgroup;
 struct page;
@@ -67,12 +72,221 @@ enum mem_cgroup_events_index {
 	MEMCG_NR_EVENTS,
 };
 
+/*
+ * Per memcg event counter is incremented at every pagein/pageout. With THP,
+ * it will be incremated by the number of pages. This counter is used for
+ * for trigger some periodic events. This is straightforward and better
+ * than using jiffies etc. to handle periodic memcg event.
+ */
+enum mem_cgroup_events_target {
+	MEM_CGROUP_TARGET_THRESH,
+	MEM_CGROUP_TARGET_SOFTLIMIT,
+	MEM_CGROUP_TARGET_NUMAINFO,
+	MEM_CGROUP_NTARGETS,
+};
+
+/*
+ * Bits in struct cg_proto.flags
+ */
+enum cg_proto_flags {
+	/* Currently active and new sockets should be assigned to cgroups */
+	MEMCG_SOCK_ACTIVE,
+	/* It was ever activated; we must disarm static keys on destruction */
+	MEMCG_SOCK_ACTIVATED,
+};
+
+struct cg_proto {
+	struct page_counter	memory_allocated;	/* Current allocated memory. */
+	struct percpu_counter	sockets_allocated;	/* Current number of sockets. */
+	int			memory_pressure;
+	long			sysctl_mem[3];
+	unsigned long		flags;
+	/*
+	 * memcg field is used to find which memcg we belong directly
+	 * Each memcg struct can hold more than one cg_proto, so container_of
+	 * won't really cut.
+	 *
+	 * The elegant solution would be having an inverse function to
+	 * proto_cgroup in struct proto, but that means polluting the structure
+	 * for everybody, instead of just for memcg users.
+	 */
+	struct mem_cgroup	*memcg;
+};
+
 #ifdef CONFIG_MEMCG
+struct mem_cgroup_stat_cpu {
+	long count[MEM_CGROUP_STAT_NSTATS];
+	unsigned long events[MEMCG_NR_EVENTS];
+	unsigned long nr_page_events;
+	unsigned long targets[MEM_CGROUP_NTARGETS];
+};
+
+struct mem_cgroup_reclaim_iter {
+	struct mem_cgroup *position;
+	/* scan generation, increased every round-trip */
+	unsigned int generation;
+};
+
+/*
+ * per-zone information in memory controller.
+ */
+struct mem_cgroup_per_zone {
+	struct lruvec		lruvec;
+	unsigned long		lru_size[NR_LRU_LISTS];
+
+	struct mem_cgroup_reclaim_iter	iter[DEF_PRIORITY + 1];
+
+	struct rb_node		tree_node;	/* RB tree node */
+	unsigned long		usage_in_excess;/* Set to the value by which */
+						/* the soft limit is exceeded*/
+	bool			on_tree;
+	struct mem_cgroup	*memcg;		/* Back pointer, we cannot */
+						/* use container_of	   */
+};
+
+struct mem_cgroup_per_node {
+	struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES];
+};
+
+struct mem_cgroup_threshold {
+	struct eventfd_ctx *eventfd;
+	unsigned long threshold;
+};
+
+/* For threshold */
+struct mem_cgroup_threshold_ary {
+	/* An array index points to threshold just below or equal to usage. */
+	int current_threshold;
+	/* Size of entries[] */
+	unsigned int size;
+	/* Array of thresholds */
+	struct mem_cgroup_threshold entries[0];
+};
+
+struct mem_cgroup_thresholds {
+	/* Primary thresholds array */
+	struct mem_cgroup_threshold_ary *primary;
+	/*
+	 * Spare threshold array.
+	 * This is needed to make mem_cgroup_unregister_event() "never fail".
+	 * It must be able to store at least primary->size - 1 entries.
+	 */
+	struct mem_cgroup_threshold_ary *spare;
+};
+
+/*
+ * The memory controller data structure. The memory controller controls both
+ * page cache and RSS per cgroup. We would eventually like to provide
+ * statistics based on the statistics developed by Rik Van Riel for clock-pro,
+ * to help the administrator determine what knobs to tune.
+ */
+struct mem_cgroup {
+	struct cgroup_subsys_state css;
+
+	/* Accounted resources */
+	struct page_counter memory;
+	struct page_counter memsw;
+	struct page_counter kmem;
+
+	/* Normal memory consumption range */
+	unsigned long low;
+	unsigned long high;
+
+	unsigned long soft_limit;
+
+	/* vmpressure notifications */
+	struct vmpressure vmpressure;
+
+	/* css_online() has been completed */
+	int initialized;
+
+	/*
+	 * Should the accounting and control be hierarchical, per subtree?
+	 */
+	bool use_hierarchy;
+
+	/* protected by memcg_oom_lock */
+	bool		oom_lock;
+	int		under_oom;
+
+	int	swappiness;
+	/* OOM-Killer disable */
+	int		oom_kill_disable;
+
+	/* protect arrays of thresholds */
+	struct mutex thresholds_lock;
+
+	/* thresholds for memory usage. RCU-protected */
+	struct mem_cgroup_thresholds thresholds;
+
+	/* thresholds for mem+swap usage. RCU-protected */
+	struct mem_cgroup_thresholds memsw_thresholds;
+
+	/* For oom notifier event fd */
+	struct list_head oom_notify;
+
+	/*
+	 * Should we move charges of a task when a task is moved into this
+	 * mem_cgroup ? And what type of charges should we move ?
+	 */
+	unsigned long move_charge_at_immigrate;
+	/*
+	 * set > 0 if pages under this cgroup are moving to other cgroup.
+	 */
+	atomic_t		moving_account;
+	/* taken only while moving_account > 0 */
+	spinlock_t		move_lock;
+	struct task_struct	*move_lock_task;
+	unsigned long		move_lock_flags;
+	/*
+	 * percpu counter.
+	 */
+	struct mem_cgroup_stat_cpu __percpu *stat;
+	spinlock_t pcp_counter_lock;
+
+#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
+	struct cg_proto tcp_mem;
+#endif
+#if defined(CONFIG_MEMCG_KMEM)
+        /* Index in the kmem_cache->memcg_params.memcg_caches array */
+	int kmemcg_id;
+	bool kmem_acct_activated;
+	bool kmem_acct_active;
+#endif
+
+	int last_scanned_node;
+#if MAX_NUMNODES > 1
+	nodemask_t	scan_nodes;
+	atomic_t	numainfo_events;
+	atomic_t	numainfo_updating;
+#endif
+
+#ifdef CONFIG_CGROUP_WRITEBACK
+	struct list_head cgwb_list;
+	struct wb_domain cgwb_domain;
+#endif
+
+	/* List of events which userspace want to receive */
+	struct list_head event_list;
+	spinlock_t event_list_lock;
+
+	struct mem_cgroup_per_node *nodeinfo[0];
+	/* WARNING: nodeinfo must be the last member here */
+};
 extern struct cgroup_subsys_state *mem_cgroup_root_css;
 
-void mem_cgroup_events(struct mem_cgroup *memcg,
+/**
+ * mem_cgroup_events - count memory events against a cgroup
+ * @memcg: the memory cgroup
+ * @idx: the event index
+ * @nr: the number of events to account for
+ */
+static inline void mem_cgroup_events(struct mem_cgroup *memcg,
 		       enum mem_cgroup_events_index idx,
-		       unsigned int nr);
+		       unsigned int nr)
+{
+	this_cpu_add(memcg->stat->events[idx], nr);
+}
 
 bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg);
 
@@ -90,15 +304,31 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
 struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
 struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *);
 
-bool mem_cgroup_is_descendant(struct mem_cgroup *memcg,
-			      struct mem_cgroup *root);
 bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg);
 
 extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page);
 extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
 
 extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
-extern struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css);
+static inline
+struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
+	return css ? container_of(css, struct mem_cgroup, css) : NULL;
+}
+
+struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
+				   struct mem_cgroup *,
+				   struct mem_cgroup_reclaim_cookie *);
+void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
+
+static inline bool mem_cgroup_is_descendant(struct mem_cgroup *memcg,
+			      struct mem_cgroup *root)
+{
+	if (root == memcg)
+		return true;
+	if (!root->use_hierarchy)
+		return false;
+	return cgroup_is_descendant(memcg->css.cgroup, root->css.cgroup);
+}
 
 static inline bool mm_match_cgroup(struct mm_struct *mm,
 				   struct mem_cgroup *memcg)
@@ -114,22 +344,65 @@ static inline bool mm_match_cgroup(struct mm_struct *mm,
 	return match;
 }
 
-extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg);
 extern struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page);
 
-struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
-				   struct mem_cgroup *,
-				   struct mem_cgroup_reclaim_cookie *);
-void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
+static inline bool mem_cgroup_disabled(void)
+{
+	if (memory_cgrp_subsys.disabled)
+		return true;
+	return false;
+}
 
 /*
  * For memory reclaim.
  */
-int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec);
-bool mem_cgroup_lruvec_online(struct lruvec *lruvec);
 int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
-unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list);
-void mem_cgroup_update_lru_size(struct lruvec *, enum lru_list, int);
+
+void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
+		int nr_pages);
+
+static inline bool mem_cgroup_lruvec_online(struct lruvec *lruvec)
+{
+	struct mem_cgroup_per_zone *mz;
+	struct mem_cgroup *memcg;
+
+	if (mem_cgroup_disabled())
+		return true;
+
+	mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec);
+	memcg = mz->memcg;
+
+	return !!(memcg->css.flags & CSS_ONLINE);
+}
+
+static inline
+unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
+{
+	struct mem_cgroup_per_zone *mz;
+
+	mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec);
+	return mz->lru_size[lru];
+}
+
+static inline int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
+{
+	unsigned long inactive_ratio;
+	unsigned long inactive;
+	unsigned long active;
+	unsigned long gb;
+
+	inactive = mem_cgroup_get_lru_size(lruvec, LRU_INACTIVE_ANON);
+	active = mem_cgroup_get_lru_size(lruvec, LRU_ACTIVE_ANON);
+
+	gb = (inactive + active) >> (30 - PAGE_SHIFT);
+	if (gb)
+		inactive_ratio = int_sqrt(10 * gb);
+	else
+		inactive_ratio = 1;
+
+	return inactive * inactive_ratio < active;
+}
+
 extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
 					struct task_struct *p);
 
@@ -156,18 +429,26 @@ bool mem_cgroup_oom_synchronize(bool wait);
 extern int do_swap_account;
 #endif
 
-static inline bool mem_cgroup_disabled(void)
-{
-	if (memory_cgrp_subsys.disabled)
-		return true;
-	return false;
-}
-
 struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page);
-void mem_cgroup_update_page_stat(struct mem_cgroup *memcg,
-				 enum mem_cgroup_stat_index idx, int val);
 void mem_cgroup_end_page_stat(struct mem_cgroup *memcg);
 
+/**
+ * mem_cgroup_update_page_stat - update page state statistics
+ * @memcg: memcg to account against
+ * @idx: page state item to account
+ * @val: number of pages (positive or negative)
+ *
+ * See mem_cgroup_begin_page_stat() for locking requirements.
+ */
+static inline void mem_cgroup_update_page_stat(struct mem_cgroup *memcg,
+				 enum mem_cgroup_stat_index idx, int val)
+{
+	VM_BUG_ON(!rcu_read_lock_held());
+
+	if (memcg)
+		this_cpu_add(memcg->stat->count[idx], val);
+}
+
 static inline void mem_cgroup_inc_page_stat(struct mem_cgroup *memcg,
 					    enum mem_cgroup_stat_index idx)
 {
@@ -184,13 +465,31 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
 						gfp_t gfp_mask,
 						unsigned long *total_scanned);
 
-void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
 static inline void mem_cgroup_count_vm_event(struct mm_struct *mm,
 					     enum vm_event_item idx)
 {
+	struct mem_cgroup *memcg;
+
 	if (mem_cgroup_disabled())
 		return;
-	__mem_cgroup_count_vm_event(mm, idx);
+
+	rcu_read_lock();
+	memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
+	if (unlikely(!memcg))
+		goto out;
+
+	switch (idx) {
+	case PGFAULT:
+		this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGFAULT]);
+		break;
+	case PGMAJFAULT:
+		this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT]);
+		break;
+	default:
+		BUG();
+	}
+out:
+	rcu_read_unlock();
 }
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 void mem_cgroup_split_huge_fixup(struct page *head);
@@ -275,12 +574,6 @@ static inline bool task_in_mem_cgroup(struct task_struct *task,
 	return true;
 }
 
-static inline struct cgroup_subsys_state
-		*mem_cgroup_css(struct mem_cgroup *memcg)
-{
-	return NULL;
-}
-
 static inline struct mem_cgroup *
 mem_cgroup_iter(struct mem_cgroup *root,
 		struct mem_cgroup *prev,
@@ -444,7 +737,10 @@ static inline bool memcg_kmem_enabled(void)
 	return static_key_false(&memcg_kmem_enabled_key);
 }
 
-bool memcg_kmem_is_active(struct mem_cgroup *memcg);
+static inline bool memcg_kmem_is_active(struct mem_cgroup *memcg)
+{
+	return memcg->kmem_acct_active;
+}
 
 /*
  * In general, we'll do everything in our power to not incur in any overhead
@@ -463,7 +759,15 @@ void __memcg_kmem_commit_charge(struct page *page,
 				       struct mem_cgroup *memcg, int order);
 void __memcg_kmem_uncharge_pages(struct page *page, int order);
 
-int memcg_cache_id(struct mem_cgroup *memcg);
+/*
+ * helper for acessing a memcg's index. It will be used as an index in the
+ * child cache array in kmem_cache, and also to derive its name. This function
+ * will return -1 when this is not a kmem-limited memcg.
+ */
+static inline int memcg_cache_id(struct mem_cgroup *memcg)
+{
+	return memcg ? memcg->kmemcg_id : -1;
+}
 
 struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep);
 void __memcg_kmem_put_cache(struct kmem_cache *cachep);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 6282f1eb3d6a3f..2ce190709280f2 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -351,7 +351,15 @@ extern void check_move_unevictable_pages(struct page **, int nr_pages);
 extern int kswapd_run(int nid);
 extern void kswapd_stop(int nid);
 #ifdef CONFIG_MEMCG
-extern int mem_cgroup_swappiness(struct mem_cgroup *mem);
+static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg)
+{
+	/* root ? */
+	if (mem_cgroup_disabled() || !memcg->css.parent)
+		return vm_swappiness;
+
+	return memcg->swappiness;
+}
+
 #else
 static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
 {
diff --git a/include/net/sock.h b/include/net/sock.h
index 43c6abcf06abc0..a98c71ea40c5b3 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1042,34 +1042,6 @@ struct proto {
 #endif
 };
 
-/*
- * Bits in struct cg_proto.flags
- */
-enum cg_proto_flags {
-	/* Currently active and new sockets should be assigned to cgroups */
-	MEMCG_SOCK_ACTIVE,
-	/* It was ever activated; we must disarm static keys on destruction */
-	MEMCG_SOCK_ACTIVATED,
-};
-
-struct cg_proto {
-	struct page_counter	memory_allocated;	/* Current allocated memory. */
-	struct percpu_counter	sockets_allocated;	/* Current number of sockets. */
-	int			memory_pressure;
-	long			sysctl_mem[3];
-	unsigned long		flags;
-	/*
-	 * memcg field is used to find which memcg we belong directly
-	 * Each memcg struct can hold more than one cg_proto, so container_of
-	 * won't really cut.
-	 *
-	 * The elegant solution would be having an inverse function to
-	 * proto_cgroup in struct proto, but that means polluting the structure
-	 * for everybody, instead of just for memcg users.
-	 */
-	struct mem_cgroup	*memcg;
-};
-
 int proto_register(struct proto *prot, int alloc_slab);
 void proto_unregister(struct proto *prot);
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9871f13fc35bd6..6935f77589e705 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -111,56 +111,10 @@ static const char * const mem_cgroup_lru_names[] = {
 	"unevictable",
 };
 
-/*
- * Per memcg event counter is incremented at every pagein/pageout. With THP,
- * it will be incremated by the number of pages. This counter is used for
- * for trigger some periodic events. This is straightforward and better
- * than using jiffies etc. to handle periodic memcg event.
- */
-enum mem_cgroup_events_target {
-	MEM_CGROUP_TARGET_THRESH,
-	MEM_CGROUP_TARGET_SOFTLIMIT,
-	MEM_CGROUP_TARGET_NUMAINFO,
-	MEM_CGROUP_NTARGETS,
-};
 #define THRESHOLDS_EVENTS_TARGET 128
 #define SOFTLIMIT_EVENTS_TARGET 1024
 #define NUMAINFO_EVENTS_TARGET	1024
 
-struct mem_cgroup_stat_cpu {
-	long count[MEM_CGROUP_STAT_NSTATS];
-	unsigned long events[MEMCG_NR_EVENTS];
-	unsigned long nr_page_events;
-	unsigned long targets[MEM_CGROUP_NTARGETS];
-};
-
-struct reclaim_iter {
-	struct mem_cgroup *position;
-	/* scan generation, increased every round-trip */
-	unsigned int generation;
-};
-
-/*
- * per-zone information in memory controller.
- */
-struct mem_cgroup_per_zone {
-	struct lruvec		lruvec;
-	unsigned long		lru_size[NR_LRU_LISTS];
-
-	struct reclaim_iter	iter[DEF_PRIORITY + 1];
-
-	struct rb_node		tree_node;	/* RB tree node */
-	unsigned long		usage_in_excess;/* Set to the value by which */
-						/* the soft limit is exceeded*/
-	bool			on_tree;
-	struct mem_cgroup	*memcg;		/* Back pointer, we cannot */
-						/* use container_of	   */
-};
-
-struct mem_cgroup_per_node {
-	struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES];
-};
-
 /*
  * Cgroups above their limits are maintained in a RB-Tree, independent of
  * their hierarchy representation
@@ -181,32 +135,6 @@ struct mem_cgroup_tree {
 
 static struct mem_cgroup_tree soft_limit_tree __read_mostly;
 
-struct mem_cgroup_threshold {
-	struct eventfd_ctx *eventfd;
-	unsigned long threshold;
-};
-
-/* For threshold */
-struct mem_cgroup_threshold_ary {
-	/* An array index points to threshold just below or equal to usage. */
-	int current_threshold;
-	/* Size of entries[] */
-	unsigned int size;
-	/* Array of thresholds */
-	struct mem_cgroup_threshold entries[0];
-};
-
-struct mem_cgroup_thresholds {
-	/* Primary thresholds array */
-	struct mem_cgroup_threshold_ary *primary;
-	/*
-	 * Spare threshold array.
-	 * This is needed to make mem_cgroup_unregister_event() "never fail".
-	 * It must be able to store at least primary->size - 1 entries.
-	 */
-	struct mem_cgroup_threshold_ary *spare;
-};
-
 /* for OOM */
 struct mem_cgroup_eventfd_list {
 	struct list_head list;
@@ -256,113 +184,6 @@ struct mem_cgroup_event {
 static void mem_cgroup_threshold(struct mem_cgroup *memcg);
 static void mem_cgroup_oom_notify(struct mem_cgroup *memcg);
 
-/*
- * The memory controller data structure. The memory controller controls both
- * page cache and RSS per cgroup. We would eventually like to provide
- * statistics based on the statistics developed by Rik Van Riel for clock-pro,
- * to help the administrator determine what knobs to tune.
- */
-struct mem_cgroup {
-	struct cgroup_subsys_state css;
-
-	/* Accounted resources */
-	struct page_counter memory;
-	struct page_counter memsw;
-	struct page_counter kmem;
-
-	/* Normal memory consumption range */
-	unsigned long low;
-	unsigned long high;
-
-	unsigned long soft_limit;
-
-	/* vmpressure notifications */
-	struct vmpressure vmpressure;
-
-	/* css_online() has been completed */
-	int initialized;
-
-	/*
-	 * Should the accounting and control be hierarchical, per subtree?
-	 */
-	bool use_hierarchy;
-
-	/* protected by memcg_oom_lock */
-	bool		oom_lock;
-	int		under_oom;
-
-	int	swappiness;
-	/* OOM-Killer disable */
-	int		oom_kill_disable;
-
-	/* protect arrays of thresholds */
-	struct mutex thresholds_lock;
-
-	/* thresholds for memory usage. RCU-protected */
-	struct mem_cgroup_thresholds thresholds;
-
-	/* thresholds for mem+swap usage. RCU-protected */
-	struct mem_cgroup_thresholds memsw_thresholds;
-
-	/* For oom notifier event fd */
-	struct list_head oom_notify;
-
-	/*
-	 * Should we move charges of a task when a task is moved into this
-	 * mem_cgroup ? And what type of charges should we move ?
-	 */
-	unsigned long move_charge_at_immigrate;
-	/*
-	 * set > 0 if pages under this cgroup are moving to other cgroup.
-	 */
-	atomic_t		moving_account;
-	/* taken only while moving_account > 0 */
-	spinlock_t		move_lock;
-	struct task_struct	*move_lock_task;
-	unsigned long		move_lock_flags;
-	/*
-	 * percpu counter.
-	 */
-	struct mem_cgroup_stat_cpu __percpu *stat;
-	spinlock_t pcp_counter_lock;
-
-#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
-	struct cg_proto tcp_mem;
-#endif
-#if defined(CONFIG_MEMCG_KMEM)
-        /* Index in the kmem_cache->memcg_params.memcg_caches array */
-	int kmemcg_id;
-	bool kmem_acct_activated;
-	bool kmem_acct_active;
-#endif
-
-	int last_scanned_node;
-#if MAX_NUMNODES > 1
-	nodemask_t	scan_nodes;
-	atomic_t	numainfo_events;
-	atomic_t	numainfo_updating;
-#endif
-
-#ifdef CONFIG_CGROUP_WRITEBACK
-	struct list_head cgwb_list;
-	struct wb_domain cgwb_domain;
-#endif
-
-	/* List of events which userspace want to receive */
-	struct list_head event_list;
-	spinlock_t event_list_lock;
-
-	struct mem_cgroup_per_node *nodeinfo[0];
-	/* WARNING: nodeinfo must be the last member here */
-};
-
-#ifdef CONFIG_MEMCG_KMEM
-bool memcg_kmem_is_active(struct mem_cgroup *memcg)
-{
-	return memcg->kmem_acct_active;
-}
-#endif
-
 /* Stuffs for move charges at task migration. */
 /*
  * Types of charges to be moved.
@@ -423,11 +244,6 @@ enum res_type {
  */
 static DEFINE_MUTEX(memcg_create_mutex);
 
-struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *s)
-{
-	return s ? container_of(s, struct mem_cgroup, css) : NULL;
-}
-
 /* Some nice accessors for the vmpressure. */
 struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg)
 {
@@ -593,11 +409,6 @@ mem_cgroup_zone_zoneinfo(struct mem_cgroup *memcg, struct zone *zone)
 	return &memcg->nodeinfo[nid]->zoneinfo[zid];
 }
 
-struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg)
-{
-	return &memcg->css;
-}
-
 /**
  * mem_cgroup_css_from_page - css of the memcg associated with a page
  * @page: page of interest
@@ -876,14 +687,6 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
 	__this_cpu_add(memcg->stat->nr_page_events, nr_pages);
 }
 
-unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
-{
-	struct mem_cgroup_per_zone *mz;
-
-	mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec);
-	return mz->lru_size[lru];
-}
-
 static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
 						  int nid,
 						  unsigned int lru_mask)
@@ -986,6 +789,7 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
 
 	return mem_cgroup_from_css(task_css(p, memory_cgrp_id));
 }
+EXPORT_SYMBOL(mem_cgroup_from_task);
 
 static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
 {
@@ -1031,7 +835,7 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
 				   struct mem_cgroup *prev,
 				   struct mem_cgroup_reclaim_cookie *reclaim)
 {
-	struct reclaim_iter *uninitialized_var(iter);
+	struct mem_cgroup_reclaim_iter *uninitialized_var(iter);
 	struct cgroup_subsys_state *css = NULL;
 	struct mem_cgroup *memcg = NULL;
 	struct mem_cgroup *pos = NULL;
@@ -1173,30 +977,6 @@ void mem_cgroup_iter_break(struct mem_cgroup *root,
 	     iter != NULL;				\
 	     iter = mem_cgroup_iter(NULL, iter, NULL))
 
-void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
-{
-	struct mem_cgroup *memcg;
-
-	rcu_read_lock();
-	memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
-	if (unlikely(!memcg))
-		goto out;
-
-	switch (idx) {
-	case PGFAULT:
-		this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGFAULT]);
-		break;
-	case PGMAJFAULT:
-		this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT]);
-		break;
-	default:
-		BUG();
-	}
-out:
-	rcu_read_unlock();
-}
-EXPORT_SYMBOL(__mem_cgroup_count_vm_event);
-
 /**
  * mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg
  * @zone: zone of the wanted lruvec
@@ -1295,15 +1075,6 @@ void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
 	VM_BUG_ON((long)(*lru_size) < 0);
 }
 
-bool mem_cgroup_is_descendant(struct mem_cgroup *memcg, struct mem_cgroup *root)
-{
-	if (root == memcg)
-		return true;
-	if (!root->use_hierarchy)
-		return false;
-	return cgroup_is_descendant(memcg->css.cgroup, root->css.cgroup);
-}
-
 bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg)
 {
 	struct mem_cgroup *task_memcg;
@@ -1330,39 +1101,6 @@ bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg)
 	return ret;
 }
 
-int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
-{
-	unsigned long inactive_ratio;
-	unsigned long inactive;
-	unsigned long active;
-	unsigned long gb;
-
-	inactive = mem_cgroup_get_lru_size(lruvec, LRU_INACTIVE_ANON);
-	active = mem_cgroup_get_lru_size(lruvec, LRU_ACTIVE_ANON);
-
-	gb = (inactive + active) >> (30 - PAGE_SHIFT);
-	if (gb)
-		inactive_ratio = int_sqrt(10 * gb);
-	else
-		inactive_ratio = 1;
-
-	return inactive * inactive_ratio < active;
-}
-
-bool mem_cgroup_lruvec_online(struct lruvec *lruvec)
-{
-	struct mem_cgroup_per_zone *mz;
-	struct mem_cgroup *memcg;
-
-	if (mem_cgroup_disabled())
-		return true;
-
-	mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec);
-	memcg = mz->memcg;
-
-	return !!(memcg->css.flags & CSS_ONLINE);
-}
-
 #define mem_cgroup_from_counter(counter, member)	\
 	container_of(counter, struct mem_cgroup, member)
 
@@ -1394,15 +1132,6 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
 	return margin;
 }
 
-int mem_cgroup_swappiness(struct mem_cgroup *memcg)
-{
-	/* root ? */
-	if (mem_cgroup_disabled() || !memcg->css.parent)
-		return vm_swappiness;
-
-	return memcg->swappiness;
-}
-
 /*
  * A routine for checking "mem" is under move_account() or not.
  *
@@ -2067,23 +1796,6 @@ void mem_cgroup_end_page_stat(struct mem_cgroup *memcg)
 }
 EXPORT_SYMBOL(mem_cgroup_end_page_stat);
 
-/**
- * mem_cgroup_update_page_stat - update page state statistics
- * @memcg: memcg to account against
- * @idx: page state item to account
- * @val: number of pages (positive or negative)
- *
- * See mem_cgroup_begin_page_stat() for locking requirements.
- */
-void mem_cgroup_update_page_stat(struct mem_cgroup *memcg,
-				 enum mem_cgroup_stat_index idx, int val)
-{
-	VM_BUG_ON(!rcu_read_lock_held());
-
-	if (memcg)
-		this_cpu_add(memcg->stat->count[idx], val);
-}
-
 /*
  * size of first charge trial. "32" comes from vmscan.c's magic value.
  * TODO: maybe necessary to use big numbers in big irons.
@@ -2509,16 +2221,6 @@ void memcg_uncharge_kmem(struct mem_cgroup *memcg, unsigned long nr_pages)
 	css_put_many(&memcg->css, nr_pages);
 }
 
-/*
- * helper for acessing a memcg's index. It will be used as an index in the
- * child cache array in kmem_cache, and also to derive its name. This function
- * will return -1 when this is not a kmem-limited memcg.
- */
-int memcg_cache_id(struct mem_cgroup *memcg)
-{
-	return memcg ? memcg->kmemcg_id : -1;
-}
-
 static int memcg_alloc_cache_id(void)
 {
 	int id, size;
@@ -5525,19 +5227,6 @@ struct cgroup_subsys memory_cgrp_subsys = {
 	.early_init = 0,
 };
 
-/**
- * mem_cgroup_events - count memory events against a cgroup
- * @memcg: the memory cgroup
- * @idx: the event index
- * @nr: the number of events to account for
- */
-void mem_cgroup_events(struct mem_cgroup *memcg,
-		       enum mem_cgroup_events_index idx,
-		       unsigned int nr)
-{
-	this_cpu_add(memcg->stat->events[idx], nr);
-}
-
 /**
  * mem_cgroup_low - check if memory consumption is below the normal range
  * @root: the highest ancestor to consider
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 1f4446a90cef07..016c814101edba 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -146,7 +146,7 @@ static int hwpoison_filter_task(struct page *p)
 	if (!mem)
 		return -EINVAL;
 
-	css = mem_cgroup_css(mem);
+	css = &mem->css;
 	ino = cgroup_ino(css->cgroup);
 	css_put(css);
 
diff --git a/mm/slab_common.c b/mm/slab_common.c
index bde04a699ab63e..5ce4faeb16fbbd 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -500,7 +500,7 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg,
 			     struct kmem_cache *root_cache)
 {
 	static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */
-	struct cgroup_subsys_state *css = mem_cgroup_css(memcg);
+	struct cgroup_subsys_state *css = &memcg->css;
 	struct memcg_cache_array *arr;
 	struct kmem_cache *s = NULL;
 	char *cache_name;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b1139039122a05..bf23c88621ce66 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -175,7 +175,7 @@ static bool sane_reclaim(struct scan_control *sc)
 	if (!memcg)
 		return true;
 #ifdef CONFIG_CGROUP_WRITEBACK
-	if (cgroup_on_dfl(mem_cgroup_css(memcg)->cgroup))
+	if (memcg->css.cgroup)
 		return true;
 #endif
 	return false;

From fabc3fdde00b54825ba23230aedbf88a735b4e49 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.cz>
Date: Tue, 8 Sep 2015 15:01:04 -0700
Subject: [PATCH 651/734] memcg: get rid of mem_cgroup_root_css for
 !CONFIG_MEMCG

The only user is cgwb_bdi_init and that one depends on
CONFIG_CGROUP_WRITEBACK which in turn depends on CONFIG_MEMCG so it
doesn't make much sense to definte an empty stub for !CONFIG_MEMCG.
Moreover ERR_PTR(-EINVAL) is ugly and would lead to runtime crashes if
used in unguarded code paths.  Better fail during compilation.

Signed-off-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ab2f6880e27b2d..91bbe4ba623368 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -498,8 +498,6 @@ void mem_cgroup_split_huge_fixup(struct page *head);
 #else /* CONFIG_MEMCG */
 struct mem_cgroup;
 
-#define mem_cgroup_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL))
-
 static inline void mem_cgroup_events(struct mem_cgroup *memcg,
 				     enum mem_cgroup_events_index idx,
 				     unsigned int nr)

From 64219994898c8689c3d57668996f476f8c2d398c Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.cz>
Date: Tue, 8 Sep 2015 15:01:07 -0700
Subject: [PATCH 652/734] memcg: get rid of extern for functions in
 memcontrol.h

Most of the exported functions in this header are not marked extern so
change the rest to follow the same style.

Signed-off-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 91bbe4ba623368..d92b80b63c5ca5 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -306,10 +306,10 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *);
 
 bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg);
 
-extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page);
-extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
+struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page);
+struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
 
-extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
+struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
 static inline
 struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
 	return css ? container_of(css, struct mem_cgroup, css) : NULL;
@@ -344,7 +344,7 @@ static inline bool mm_match_cgroup(struct mm_struct *mm,
 	return match;
 }
 
-extern struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page);
+struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page);
 
 static inline bool mem_cgroup_disabled(void)
 {
@@ -403,8 +403,8 @@ static inline int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
 	return inactive * inactive_ratio < active;
 }
 
-extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
-					struct task_struct *p);
+void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
+				struct task_struct *p);
 
 static inline void mem_cgroup_oom_enable(void)
 {
@@ -719,8 +719,8 @@ static inline void sock_release_memcg(struct sock *sk)
 extern struct static_key memcg_kmem_enabled_key;
 
 extern int memcg_nr_cache_ids;
-extern void memcg_get_cache_ids(void);
-extern void memcg_put_cache_ids(void);
+void memcg_get_cache_ids(void);
+void memcg_put_cache_ids(void);
 
 /*
  * Helper macro to loop through all memcg-specific caches. Callers must still

From 9f2115f93b88e5e8d48b87b153e36a537afb58cb Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 8 Sep 2015 15:01:10 -0700
Subject: [PATCH 653/734] memcg: restructure mem_cgroup_can_attach()

Restructure it to lower nesting level and help the planned threadgroup
leader iteration changes.

This is pure reorganization.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c | 61 ++++++++++++++++++++++++++-----------------------
 1 file changed, 32 insertions(+), 29 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6935f77589e705..191d8aa5da713e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4834,10 +4834,12 @@ static void mem_cgroup_clear_mc(void)
 static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
 				 struct cgroup_taskset *tset)
 {
-	struct task_struct *p = cgroup_taskset_first(tset);
-	int ret = 0;
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+	struct mem_cgroup *from;
+	struct task_struct *p;
+	struct mm_struct *mm;
 	unsigned long move_flags;
+	int ret = 0;
 
 	/*
 	 * We are now commited to this value whatever it is. Changes in this
@@ -4845,36 +4847,37 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
 	 * So we need to save it, and keep it going.
 	 */
 	move_flags = READ_ONCE(memcg->move_charge_at_immigrate);
-	if (move_flags) {
-		struct mm_struct *mm;
-		struct mem_cgroup *from = mem_cgroup_from_task(p);
+	if (!move_flags)
+		return 0;
 
-		VM_BUG_ON(from == memcg);
+	p = cgroup_taskset_first(tset);
+	from = mem_cgroup_from_task(p);
 
-		mm = get_task_mm(p);
-		if (!mm)
-			return 0;
-		/* We move charges only when we move a owner of the mm */
-		if (mm->owner == p) {
-			VM_BUG_ON(mc.from);
-			VM_BUG_ON(mc.to);
-			VM_BUG_ON(mc.precharge);
-			VM_BUG_ON(mc.moved_charge);
-			VM_BUG_ON(mc.moved_swap);
-
-			spin_lock(&mc.lock);
-			mc.from = from;
-			mc.to = memcg;
-			mc.flags = move_flags;
-			spin_unlock(&mc.lock);
-			/* We set mc.moving_task later */
-
-			ret = mem_cgroup_precharge_mc(mm);
-			if (ret)
-				mem_cgroup_clear_mc();
-		}
-		mmput(mm);
+	VM_BUG_ON(from == memcg);
+
+	mm = get_task_mm(p);
+	if (!mm)
+		return 0;
+	/* We move charges only when we move a owner of the mm */
+	if (mm->owner == p) {
+		VM_BUG_ON(mc.from);
+		VM_BUG_ON(mc.to);
+		VM_BUG_ON(mc.precharge);
+		VM_BUG_ON(mc.moved_charge);
+		VM_BUG_ON(mc.moved_swap);
+
+		spin_lock(&mc.lock);
+		mc.from = from;
+		mc.to = memcg;
+		mc.flags = move_flags;
+		spin_unlock(&mc.lock);
+		/* We set mc.moving_task later */
+
+		ret = mem_cgroup_precharge_mc(mm);
+		if (ret)
+			mem_cgroup_clear_mc();
 	}
+	mmput(mm);
 	return ret;
 }
 

From a03f1f058969ec350fb7451a6fbca23096ee5727 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.cz>
Date: Tue, 8 Sep 2015 15:01:13 -0700
Subject: [PATCH 654/734] memcg, tcp_kmem: check for cg_proto in
 sock_update_memcg

sk_prot->proto_cgroup is allowed to return NULL but sock_update_memcg
doesn't check for NULL.  The function relies on the mem_cgroup_is_root
check because we shouldn't get NULL otherwise because mem_cgroup_from_task
will always return !NULL.

All other callers are checking for NULL and we can safely replace
mem_cgroup_is_root() check by cg_proto != NULL which will be more
straightforward (proto_cgroup returns NULL for the root memcg already).

Signed-off-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 191d8aa5da713e..3033e6c4222952 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -315,8 +315,7 @@ void sock_update_memcg(struct sock *sk)
 		rcu_read_lock();
 		memcg = mem_cgroup_from_task(current);
 		cg_proto = sk->sk_prot->proto_cgroup(memcg);
-		if (!mem_cgroup_is_root(memcg) &&
-		    memcg_proto_active(cg_proto) &&
+		if (cg_proto && memcg_proto_active(cg_proto) &&
 		    css_tryget_online(&memcg->css)) {
 			sk->sk_cgrp = cg_proto;
 		}

From e752eb68811aeece2220e183e23369a34122fb5e Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Tue, 8 Sep 2015 15:01:16 -0700
Subject: [PATCH 655/734] memcg: move memcg_proto_active from sock.h

The only user is sock_update_memcg which is living in memcontrol.c so it
doesn't make much sense to pollute sock.h by this inline helper.  Move it
to memcontrol.c and open code it into its only caller.

Signed-off-by: Michal Hocko <mhocko@suse.com>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/net/sock.h | 5 -----
 mm/memcontrol.c    | 2 +-
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index a98c71ea40c5b3..7aa78440559a47 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1045,11 +1045,6 @@ struct proto {
 int proto_register(struct proto *prot, int alloc_slab);
 void proto_unregister(struct proto *prot);
 
-static inline bool memcg_proto_active(struct cg_proto *cg_proto)
-{
-	return test_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
-}
-
 #ifdef SOCK_REFCNT_DEBUG
 static inline void sk_refcnt_debug_inc(struct sock *sk)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 3033e6c4222952..1742a2db89c7be 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -315,7 +315,7 @@ void sock_update_memcg(struct sock *sk)
 		rcu_read_lock();
 		memcg = mem_cgroup_from_task(current);
 		cg_proto = sk->sk_prot->proto_cgroup(memcg);
-		if (cg_proto && memcg_proto_active(cg_proto) &&
+		if (cg_proto && test_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags) &&
 		    css_tryget_online(&memcg->css)) {
 			sk->sk_cgrp = cg_proto;
 		}

From 156408c0edaacbea1137f58b28fd6ec341064310 Mon Sep 17 00:00:00 2001
From: Vishnu Pratap Singh <vishnu.ps@samsung.com>
Date: Tue, 8 Sep 2015 15:01:19 -0700
Subject: [PATCH 656/734] lib/show_mem.c: correct reserved memory calculation

CMA reserved memory is not part of total reserved memory.  Currently
when we print the total reserve memory it considers cma as part of
reserve memory and do minus of totalcma_pages from reserved, which is
wrong.  In cases where total reserved is less than cma reserved we will
get negative values & while printing we print as unsigned and we will
get a very large value.

Below is the show mem output on X86 ubuntu based system where CMA
reserved is 100MB (25600 pages) & total reserved is ~40MB(10316 pages).
And reserve memory shows a large value because of this bug.

Before:
[  127.066430] 898908 pages RAM
[  127.066432] 671682 pages HighMem/MovableOnly
[  127.066434] 4294952012 pages reserved
[  127.066436] 25600 pages cma reserved

After:
[   44.663129] 898908 pages RAM
[   44.663130] 671682 pages HighMem/MovableOnly
[   44.663130] 10316 pages reserved
[   44.663131] 25600 pages cma reserved

Signed-off-by: Vishnu Pratap Singh <vishnu.ps@samsung.com>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: Danesh Petigara <dpetigara@broadcom.com>
Cc: Laura Abbott <lauraa@codeaurora.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/show_mem.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/lib/show_mem.c b/lib/show_mem.c
index adc98e1825ba0d..1feed6a2b12ae6 100644
--- a/lib/show_mem.c
+++ b/lib/show_mem.c
@@ -38,11 +38,9 @@ void show_mem(unsigned int filter)
 
 	printk("%lu pages RAM\n", total);
 	printk("%lu pages HighMem/MovableOnly\n", highmem);
+	printk("%lu pages reserved\n", reserved);
 #ifdef CONFIG_CMA
-	printk("%lu pages reserved\n", (reserved - totalcma_pages));
 	printk("%lu pages cma reserved\n", totalcma_pages);
-#else
-	printk("%lu pages reserved\n", reserved);
 #endif
 #ifdef CONFIG_QUICKLIST
 	printk("%lu pages in pagetable cache\n",

From aa016d145d4c3b8a7273429528f19d5b423ddbc7 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 8 Sep 2015 15:01:22 -0700
Subject: [PATCH 657/734] mm, page_isolation: remove bogus tests for isolated
 pages

The __test_page_isolated_in_pageblock() is used to verify whether all
pages in pageblock were either successfully isolated, or are hwpoisoned.
Two of the possible state of pages, that are tested, are however bogus
and misleading.

Both tests rely on get_freepage_migratetype(page), which however has no
guarantees about pages on freelists.  Specifically, it doesn't guarantee
that the migratetype returned by the function actually matches the
migratetype of the freelist that the page is on.  Such guarantee is not
its purpose and would have negative impact on allocator performance.

The first test checks whether the freepage_migratetype equals
MIGRATE_ISOLATE, supposedly to catch races between page isolation and
allocator activity.  These races should be fixed nowadays with
51bb1a4093 ("mm/page_alloc: add freepage on isolate pageblock to correct
buddy list") and related patches.  As explained above, the check
wouldn't be able to catch them reliably anyway.  For the same reason
false positives can happen, although they are harmless, as the
move_freepages() call would just move the page to the same freelist it's
already on.  So removing the test is not a bug fix, just cleanup.  After
this patch, we assume that all PageBuddy pages are on the correct
freelist and that the races were really fixed.  A truly reliable
verification in the form of e.g.  VM_BUG_ON() would be complicated and
is arguably not needed.

The second test (page_count(page) == 0 && get_freepage_migratetype(page)
== MIGRATE_ISOLATE) is probably supposed (the code comes from a big
memory isolation patch from 2007) to catch pages on MIGRATE_ISOLATE
pcplists.  However, pcplists don't contain MIGRATE_ISOLATE freepages
nowadays, those are freed directly to free lists, so the check is
obsolete.  Remove it as well.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Minchan Kim <minchan@kernel.org>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Cc: Laura Abbott <lauraa@codeaurora.org>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Seungho Park <seungho1.park@lge.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c     |  4 ++++
 mm/page_isolation.c | 30 ++++++------------------------
 2 files changed, 10 insertions(+), 24 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5f9394df19bf47..a329cfaf634d5c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -788,7 +788,11 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 			page = list_entry(list->prev, struct page, lru);
 			/* must delete as __free_one_page list manipulates */
 			list_del(&page->lru);
+
 			mt = get_freepage_migratetype(page);
+			/* MIGRATE_ISOLATE page should not go to pcplists */
+			VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
+			/* Pageblock could have been isolated meanwhile */
 			if (unlikely(has_isolate_pageblock(zone)))
 				mt = get_pageblock_migratetype(page);
 
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 303c908790efca..32fdc1df05e5f0 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -223,34 +223,16 @@ __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn,
 			continue;
 		}
 		page = pfn_to_page(pfn);
-		if (PageBuddy(page)) {
+		if (PageBuddy(page))
 			/*
-			 * If race between isolatation and allocation happens,
-			 * some free pages could be in MIGRATE_MOVABLE list
-			 * although pageblock's migratation type of the page
-			 * is MIGRATE_ISOLATE. Catch it and move the page into
-			 * MIGRATE_ISOLATE list.
+			 * If the page is on a free list, it has to be on
+			 * the correct MIGRATE_ISOLATE freelist. There is no
+			 * simple way to verify that as VM_BUG_ON(), though.
 			 */
-			if (get_freepage_migratetype(page) != MIGRATE_ISOLATE) {
-				struct page *end_page;
-
-				end_page = page + (1 << page_order(page)) - 1;
-				move_freepages(page_zone(page), page, end_page,
-						MIGRATE_ISOLATE);
-			}
 			pfn += 1 << page_order(page);
-		}
-		else if (page_count(page) == 0 &&
-			get_freepage_migratetype(page) == MIGRATE_ISOLATE)
-			pfn += 1;
-		else if (skip_hwpoisoned_pages && PageHWPoison(page)) {
-			/*
-			 * The HWPoisoned page may be not in buddy
-			 * system, and page_count() is not 0.
-			 */
+		else if (skip_hwpoisoned_pages && PageHWPoison(page))
+			/* A HWPoisoned page cannot be also PageBuddy */
 			pfn++;
-			continue;
-		}
 		else
 			break;
 	}

From bb14c2c75db972a1bf65fd63c8d5a0b41a8f263a Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 8 Sep 2015 15:01:25 -0700
Subject: [PATCH 658/734] mm: rename and move get/set_freepage_migratetype

The pair of get/set_freepage_migratetype() functions are used to cache
pageblock migratetype for a page put on a pcplist, so that it does not
have to be retrieved again when the page is put on a free list (e.g.
when pcplists become full).  Historically it was also assumed that the
value is accurate for pages on freelists (as the functions' names
unfortunately suggest), but that cannot be guaranteed without affecting
various allocator fast paths.  It is in fact not needed and all such
uses have been removed.

The last remaining (but pointless) usage related to pages of freelists
is in move_freepages(), which this patch removes.

To prevent further confusion, rename the functions to
get/set_pcppage_migratetype() and expand their description.  Since all
the users are now in mm/page_alloc.c, move the functions there from the
shared header.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Minchan Kim <minchan@kernel.org>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Cc: Laura Abbott <lauraa@codeaurora.org>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Seungho Park <seungho1.park@lge.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 12 ------------
 mm/page_alloc.c    | 41 ++++++++++++++++++++++++++++-------------
 2 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4ec72ef1f04a91..bab8ff89da5085 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -309,18 +309,6 @@ struct inode;
 #define page_private(page)		((page)->private)
 #define set_page_private(page, v)	((page)->private = (v))
 
-/* It's valid only if the page is free path or free_list */
-static inline void set_freepage_migratetype(struct page *page, int migratetype)
-{
-	page->index = migratetype;
-}
-
-/* It's valid only if the page is free path or free_list */
-static inline int get_freepage_migratetype(struct page *page)
-{
-	return page->index;
-}
-
 /*
  * FIXME: take this include out, include page-flags.h in
  * files which need it (119 of them)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a329cfaf634d5c..252665d553b487 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -125,6 +125,24 @@ unsigned long dirty_balance_reserve __read_mostly;
 int percpu_pagelist_fraction;
 gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
 
+/*
+ * A cached value of the page's pageblock's migratetype, used when the page is
+ * put on a pcplist. Used to avoid the pageblock migratetype lookup when
+ * freeing from pcplists in most cases, at the cost of possibly becoming stale.
+ * Also the migratetype set in the page does not necessarily match the pcplist
+ * index, e.g. page might have MIGRATE_CMA set but be on a pcplist with any
+ * other index - this ensures that it will be put on the correct CMA freelist.
+ */
+static inline int get_pcppage_migratetype(struct page *page)
+{
+	return page->index;
+}
+
+static inline void set_pcppage_migratetype(struct page *page, int migratetype)
+{
+	page->index = migratetype;
+}
+
 #ifdef CONFIG_PM_SLEEP
 /*
  * The following functions are used by the suspend/hibernate code to temporarily
@@ -789,7 +807,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 			/* must delete as __free_one_page list manipulates */
 			list_del(&page->lru);
 
-			mt = get_freepage_migratetype(page);
+			mt = get_pcppage_migratetype(page);
 			/* MIGRATE_ISOLATE page should not go to pcplists */
 			VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
 			/* Pageblock could have been isolated meanwhile */
@@ -956,7 +974,6 @@ static void __free_pages_ok(struct page *page, unsigned int order)
 	migratetype = get_pfnblock_migratetype(page, pfn);
 	local_irq_save(flags);
 	__count_vm_events(PGFREE, 1 << order);
-	set_freepage_migratetype(page, migratetype);
 	free_one_page(page_zone(page), page, pfn, order, migratetype);
 	local_irq_restore(flags);
 }
@@ -1384,7 +1401,7 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
 		rmv_page_order(page);
 		area->nr_free--;
 		expand(zone, page, order, current_order, area, migratetype);
-		set_freepage_migratetype(page, migratetype);
+		set_pcppage_migratetype(page, migratetype);
 		return page;
 	}
 
@@ -1461,7 +1478,6 @@ int move_freepages(struct zone *zone,
 		order = page_order(page);
 		list_move(&page->lru,
 			  &zone->free_area[order].free_list[migratetype]);
-		set_freepage_migratetype(page, migratetype);
 		page += 1 << order;
 		pages_moved += 1 << order;
 	}
@@ -1631,14 +1647,13 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
 		expand(zone, page, order, current_order, area,
 					start_migratetype);
 		/*
-		 * The freepage_migratetype may differ from pageblock's
+		 * The pcppage_migratetype may differ from pageblock's
 		 * migratetype depending on the decisions in
-		 * try_to_steal_freepages(). This is OK as long as it
-		 * does not differ for MIGRATE_CMA pageblocks. For CMA
-		 * we need to make sure unallocated pages flushed from
-		 * pcp lists are returned to the correct freelist.
+		 * find_suitable_fallback(). This is OK as long as it does not
+		 * differ for MIGRATE_CMA pageblocks. Those can be used as
+		 * fallback only via special __rmqueue_cma_fallback() function
 		 */
-		set_freepage_migratetype(page, start_migratetype);
+		set_pcppage_migratetype(page, start_migratetype);
 
 		trace_mm_page_alloc_extfrag(page, order, current_order,
 			start_migratetype, fallback_mt);
@@ -1714,7 +1729,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
 		else
 			list_add_tail(&page->lru, list);
 		list = &page->lru;
-		if (is_migrate_cma(get_freepage_migratetype(page)))
+		if (is_migrate_cma(get_pcppage_migratetype(page)))
 			__mod_zone_page_state(zone, NR_FREE_CMA_PAGES,
 					      -(1 << order));
 	}
@@ -1911,7 +1926,7 @@ void free_hot_cold_page(struct page *page, bool cold)
 		return;
 
 	migratetype = get_pfnblock_migratetype(page, pfn);
-	set_freepage_migratetype(page, migratetype);
+	set_pcppage_migratetype(page, migratetype);
 	local_irq_save(flags);
 	__count_vm_event(PGFREE);
 
@@ -2116,7 +2131,7 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
 		if (!page)
 			goto failed;
 		__mod_zone_freepage_state(zone, -(1 << order),
-					  get_freepage_migratetype(page));
+					  get_pcppage_migratetype(page));
 	}
 
 	__mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));

From 5e9113731a3ce616e8b5aa128ffc1aeaa4942571 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Tue, 8 Sep 2015 15:01:28 -0700
Subject: [PATCH 659/734] mm/hugetlb: add cache of descriptors to resv_map for
 region_add

hugetlbfs is used today by applications that want a high degree of
control over huge page usage.  Often, large hugetlbfs files are used to
map a large number huge pages into the application processes.  The
applications know when page ranges within these large files will no
longer be used, and ideally would like to release them back to the
subpool or global pools for other uses.  The fallocate() system call
provides an interface for preallocation and hole punching within files.
This patch set adds fallocate functionality to hugetlbfs.

fallocate hole punch will want to remove a specific range of pages.
When pages are removed, their associated entries in the region/reserve
map will also be removed.  This will break an assumption in the
region_chg/region_add calling sequence.  If a new region descriptor must
be allocated, it is done as part of the region_chg processing.  In this
way, region_add can not fail because it does not need to attempt an
allocation.

To prepare for fallocate hole punch, create a "cache" of descriptors
that can be used by region_add if necessary.  region_chg will ensure
there are sufficient entries in the cache.  It will be necessary to
track the number of in progress add operations to know a sufficient
number of descriptors reside in the cache.  A new routine region_abort
is added to adjust this in progress count when add operations are
aborted.  vma_abort_reservation is also added for callers creating
reservations with vma_needs_reservation/vma_commit_reservation.

[akpm@linux-foundation.org: fix typo in comment, use more cols]
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Aneesh Kumar <aneesh.kumar@linux.vnet.ibm.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h |   3 +
 mm/hugetlb.c            | 174 +++++++++++++++++++++++++++++++++++-----
 2 files changed, 155 insertions(+), 22 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index d891f949466ae2..e2d94960b38bce 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -35,6 +35,9 @@ struct resv_map {
 	struct kref refs;
 	spinlock_t lock;
 	struct list_head regions;
+	long adds_in_progress;
+	struct list_head region_cache;
+	long region_cache_count;
 };
 extern struct resv_map *resv_map_alloc(void);
 void resv_map_release(struct kref *ref);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 51ae41d0fbc0d8..4e5815ed7a8e49 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -240,11 +240,14 @@ struct file_region {
 
 /*
  * Add the huge page range represented by [f, t) to the reserve
- * map.  Existing regions will be expanded to accommodate the
- * specified range.  We know only existing regions need to be
- * expanded, because region_add is only called after region_chg
- * with the same range.  If a new file_region structure must
- * be allocated, it is done in region_chg.
+ * map.  In the normal case, existing regions will be expanded
+ * to accommodate the specified range.  Sufficient regions should
+ * exist for expansion due to the previous call to region_chg
+ * with the same range.  However, it is possible that region_del
+ * could have been called after region_chg and modifed the map
+ * in such a way that no region exists to be expanded.  In this
+ * case, pull a region descriptor from the cache associated with
+ * the map and use that for the new range.
  *
  * Return the number of new huge pages added to the map.  This
  * number is greater than or equal to zero.
@@ -261,6 +264,28 @@ static long region_add(struct resv_map *resv, long f, long t)
 		if (f <= rg->to)
 			break;
 
+	/*
+	 * If no region exists which can be expanded to include the
+	 * specified range, the list must have been modified by an
+	 * interleving call to region_del().  Pull a region descriptor
+	 * from the cache and use it for this range.
+	 */
+	if (&rg->link == head || t < rg->from) {
+		VM_BUG_ON(resv->region_cache_count <= 0);
+
+		resv->region_cache_count--;
+		nrg = list_first_entry(&resv->region_cache, struct file_region,
+					link);
+		list_del(&nrg->link);
+
+		nrg->from = f;
+		nrg->to = t;
+		list_add(&nrg->link, rg->link.prev);
+
+		add += t - f;
+		goto out_locked;
+	}
+
 	/* Round our left edge to the current segment if it encloses us. */
 	if (f > rg->from)
 		f = rg->from;
@@ -294,6 +319,8 @@ static long region_add(struct resv_map *resv, long f, long t)
 	add += t - nrg->to;		/* Added to end of region */
 	nrg->to = t;
 
+out_locked:
+	resv->adds_in_progress--;
 	spin_unlock(&resv->lock);
 	VM_BUG_ON(add < 0);
 	return add;
@@ -312,11 +339,14 @@ static long region_add(struct resv_map *resv, long f, long t)
  * so that the subsequent region_add call will have all the
  * regions it needs and will not fail.
  *
- * Returns the number of huge pages that need to be added
- * to the existing reservation map for the range [f, t).
- * This number is greater or equal to zero.  -ENOMEM is
- * returned if a new file_region structure is needed and can
- * not be allocated.
+ * Upon entry, region_chg will also examine the cache of region descriptors
+ * associated with the map.  If there are not enough descriptors cached, one
+ * will be allocated for the in progress add operation.
+ *
+ * Returns the number of huge pages that need to be added to the existing
+ * reservation map for the range [f, t).  This number is greater or equal to
+ * zero.  -ENOMEM is returned if a new file_region structure or cache entry
+ * is needed and can not be allocated.
  */
 static long region_chg(struct resv_map *resv, long f, long t)
 {
@@ -326,6 +356,31 @@ static long region_chg(struct resv_map *resv, long f, long t)
 
 retry:
 	spin_lock(&resv->lock);
+retry_locked:
+	resv->adds_in_progress++;
+
+	/*
+	 * Check for sufficient descriptors in the cache to accommodate
+	 * the number of in progress add operations.
+	 */
+	if (resv->adds_in_progress > resv->region_cache_count) {
+		struct file_region *trg;
+
+		VM_BUG_ON(resv->adds_in_progress - resv->region_cache_count > 1);
+		/* Must drop lock to allocate a new descriptor. */
+		resv->adds_in_progress--;
+		spin_unlock(&resv->lock);
+
+		trg = kmalloc(sizeof(*trg), GFP_KERNEL);
+		if (!trg)
+			return -ENOMEM;
+
+		spin_lock(&resv->lock);
+		list_add(&trg->link, &resv->region_cache);
+		resv->region_cache_count++;
+		goto retry_locked;
+	}
+
 	/* Locate the region we are before or in. */
 	list_for_each_entry(rg, head, link)
 		if (f <= rg->to)
@@ -336,6 +391,7 @@ static long region_chg(struct resv_map *resv, long f, long t)
 	 * size such that we can guarantee to record the reservation. */
 	if (&rg->link == head || t < rg->from) {
 		if (!nrg) {
+			resv->adds_in_progress--;
 			spin_unlock(&resv->lock);
 			nrg = kmalloc(sizeof(*nrg), GFP_KERNEL);
 			if (!nrg)
@@ -384,6 +440,25 @@ static long region_chg(struct resv_map *resv, long f, long t)
 	return chg;
 }
 
+/*
+ * Abort the in progress add operation.  The adds_in_progress field
+ * of the resv_map keeps track of the operations in progress between
+ * calls to region_chg and region_add.  Operations are sometimes
+ * aborted after the call to region_chg.  In such cases, region_abort
+ * is called to decrement the adds_in_progress counter.
+ *
+ * NOTE: The range arguments [f, t) are not needed or used in this
+ * routine.  They are kept to make reading the calling code easier as
+ * arguments will match the associated region_chg call.
+ */
+static void region_abort(struct resv_map *resv, long f, long t)
+{
+	spin_lock(&resv->lock);
+	VM_BUG_ON(!resv->region_cache_count);
+	resv->adds_in_progress--;
+	spin_unlock(&resv->lock);
+}
+
 /*
  * Truncate the reserve map at index 'end'.  Modify/truncate any
  * region which contains end.  Delete any regions past end.
@@ -544,22 +619,44 @@ static void set_vma_private_data(struct vm_area_struct *vma,
 struct resv_map *resv_map_alloc(void)
 {
 	struct resv_map *resv_map = kmalloc(sizeof(*resv_map), GFP_KERNEL);
-	if (!resv_map)
+	struct file_region *rg = kmalloc(sizeof(*rg), GFP_KERNEL);
+
+	if (!resv_map || !rg) {
+		kfree(resv_map);
+		kfree(rg);
 		return NULL;
+	}
 
 	kref_init(&resv_map->refs);
 	spin_lock_init(&resv_map->lock);
 	INIT_LIST_HEAD(&resv_map->regions);
 
+	resv_map->adds_in_progress = 0;
+
+	INIT_LIST_HEAD(&resv_map->region_cache);
+	list_add(&rg->link, &resv_map->region_cache);
+	resv_map->region_cache_count = 1;
+
 	return resv_map;
 }
 
 void resv_map_release(struct kref *ref)
 {
 	struct resv_map *resv_map = container_of(ref, struct resv_map, refs);
+	struct list_head *head = &resv_map->region_cache;
+	struct file_region *rg, *trg;
 
 	/* Clear out any active regions before we release the map. */
 	region_truncate(resv_map, 0);
+
+	/* ... and any entries left in the cache */
+	list_for_each_entry_safe(rg, trg, head, link) {
+		list_del(&rg->link);
+		kfree(rg);
+	}
+
+	VM_BUG_ON(resv_map->adds_in_progress);
+
 	kfree(resv_map);
 }
 
@@ -1473,16 +1570,18 @@ static void return_unused_surplus_pages(struct hstate *h,
 	}
 }
 
+
 /*
- * vma_needs_reservation and vma_commit_reservation are used by the huge
- * page allocation routines to manage reservations.
+ * vma_needs_reservation, vma_commit_reservation and vma_abort_reservation
+ * are used by the huge page allocation routines to manage reservations.
  *
  * vma_needs_reservation is called to determine if the huge page at addr
  * within the vma has an associated reservation.  If a reservation is
  * needed, the value 1 is returned.  The caller is then responsible for
  * managing the global reservation and subpool usage counts.  After
  * the huge page has been allocated, vma_commit_reservation is called
- * to add the page to the reservation map.
+ * to add the page to the reservation map.  If the reservation must be
+ * aborted instead of committed, vma_abort_reservation is called.
  *
  * In the normal case, vma_commit_reservation returns the same value
  * as the preceding vma_needs_reservation call.  The only time this
@@ -1490,9 +1589,14 @@ static void return_unused_surplus_pages(struct hstate *h,
  * is the responsibility of the caller to notice the difference and
  * take appropriate action.
  */
+enum vma_resv_mode {
+	VMA_NEEDS_RESV,
+	VMA_COMMIT_RESV,
+	VMA_ABORT_RESV,
+};
 static long __vma_reservation_common(struct hstate *h,
 				struct vm_area_struct *vma, unsigned long addr,
-				bool commit)
+				enum vma_resv_mode mode)
 {
 	struct resv_map *resv;
 	pgoff_t idx;
@@ -1503,10 +1607,20 @@ static long __vma_reservation_common(struct hstate *h,
 		return 1;
 
 	idx = vma_hugecache_offset(h, vma, addr);
-	if (commit)
-		ret = region_add(resv, idx, idx + 1);
-	else
+	switch (mode) {
+	case VMA_NEEDS_RESV:
 		ret = region_chg(resv, idx, idx + 1);
+		break;
+	case VMA_COMMIT_RESV:
+		ret = region_add(resv, idx, idx + 1);
+		break;
+	case VMA_ABORT_RESV:
+		region_abort(resv, idx, idx + 1);
+		ret = 0;
+		break;
+	default:
+		BUG();
+	}
 
 	if (vma->vm_flags & VM_MAYSHARE)
 		return ret;
@@ -1517,13 +1631,19 @@ static long __vma_reservation_common(struct hstate *h,
 static long vma_needs_reservation(struct hstate *h,
 			struct vm_area_struct *vma, unsigned long addr)
 {
-	return __vma_reservation_common(h, vma, addr, false);
+	return __vma_reservation_common(h, vma, addr, VMA_NEEDS_RESV);
 }
 
 static long vma_commit_reservation(struct hstate *h,
 			struct vm_area_struct *vma, unsigned long addr)
 {
-	return __vma_reservation_common(h, vma, addr, true);
+	return __vma_reservation_common(h, vma, addr, VMA_COMMIT_RESV);
+}
+
+static void vma_abort_reservation(struct hstate *h,
+			struct vm_area_struct *vma, unsigned long addr)
+{
+	(void)__vma_reservation_common(h, vma, addr, VMA_ABORT_RESV);
 }
 
 static struct page *alloc_huge_page(struct vm_area_struct *vma,
@@ -1549,8 +1669,10 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 	if (chg < 0)
 		return ERR_PTR(-ENOMEM);
 	if (chg || avoid_reserve)
-		if (hugepage_subpool_get_pages(spool, 1) < 0)
+		if (hugepage_subpool_get_pages(spool, 1) < 0) {
+			vma_abort_reservation(h, vma, addr);
 			return ERR_PTR(-ENOSPC);
+		}
 
 	ret = hugetlb_cgroup_charge_cgroup(idx, pages_per_huge_page(h), &h_cg);
 	if (ret)
@@ -1596,6 +1718,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 out_subpool_put:
 	if (chg || avoid_reserve)
 		hugepage_subpool_put_pages(spool, 1);
+	vma_abort_reservation(h, vma, addr);
 	return ERR_PTR(-ENOSPC);
 }
 
@@ -3236,11 +3359,14 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * any allocations necessary to record that reservation occur outside
 	 * the spinlock.
 	 */
-	if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED))
+	if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
 		if (vma_needs_reservation(h, vma, address) < 0) {
 			ret = VM_FAULT_OOM;
 			goto backout_unlocked;
 		}
+		/* Just decrements count, does not deallocate */
+		vma_abort_reservation(h, vma, address);
+	}
 
 	ptl = huge_pte_lockptr(h, mm, ptep);
 	spin_lock(ptl);
@@ -3387,6 +3513,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 			ret = VM_FAULT_OOM;
 			goto out_mutex;
 		}
+		/* Just decrements count, does not deallocate */
+		vma_abort_reservation(h, vma, address);
 
 		if (!(vma->vm_flags & VM_MAYSHARE))
 			pagecache_page = hugetlbfs_pagecache_page(h,
@@ -3726,6 +3854,8 @@ int hugetlb_reserve_pages(struct inode *inode,
 	}
 	return 0;
 out_err:
+	if (!vma || vma->vm_flags & VM_MAYSHARE)
+		region_abort(resv_map, from, to);
 	if (vma && is_vma_resv_set(vma, HPAGE_RESV_OWNER))
 		kref_put(&resv_map->refs, resv_map_release);
 	return ret;

From feba16e25a578080af5aad5eb9e469b4e6c23eef Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Tue, 8 Sep 2015 15:01:31 -0700
Subject: [PATCH 660/734] mm/hugetlb: add region_del() to delete a specific
 range of entries

fallocate hole punch will want to remove a specific range of pages.  The
existing region_truncate() routine deletes all region/reserve map
entries after a specified offset.  region_del() will provide this same
functionality if the end of region is specified as LONG_MAX.  Hence,
region_del() can replace region_truncate().

Unlike region_truncate(), region_del() can return an error in the rare
case where it can not allocate memory for a region descriptor.  This
ONLY happens in the case where an existing region must be split.
Current callers passing LONG_MAX as end of range will never experience
this error and do not need to deal with error handling.  Future callers
of region_del() (such as fallocate hole punch) will need to handle this
error.

Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Aneesh Kumar <aneesh.kumar@linux.vnet.ibm.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 122 +++++++++++++++++++++++++++++++++++----------------
 1 file changed, 85 insertions(+), 37 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 4e5815ed7a8e49..78e7eded4063c3 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -460,43 +460,90 @@ static void region_abort(struct resv_map *resv, long f, long t)
 }
 
 /*
- * Truncate the reserve map at index 'end'.  Modify/truncate any
- * region which contains end.  Delete any regions past end.
- * Return the number of huge pages removed from the map.
+ * Delete the specified range [f, t) from the reserve map.  If the
+ * t parameter is LONG_MAX, this indicates that ALL regions after f
+ * should be deleted.  Locate the regions which intersect [f, t)
+ * and either trim, delete or split the existing regions.
+ *
+ * Returns the number of huge pages deleted from the reserve map.
+ * In the normal case, the return value is zero or more.  In the
+ * case where a region must be split, a new region descriptor must
+ * be allocated.  If the allocation fails, -ENOMEM will be returned.
+ * NOTE: If the parameter t == LONG_MAX, then we will never split
+ * a region and possibly return -ENOMEM.  Callers specifying
+ * t == LONG_MAX do not need to check for -ENOMEM error.
  */
-static long region_truncate(struct resv_map *resv, long end)
+static long region_del(struct resv_map *resv, long f, long t)
 {
 	struct list_head *head = &resv->regions;
 	struct file_region *rg, *trg;
-	long chg = 0;
+	struct file_region *nrg = NULL;
+	long del = 0;
 
+retry:
 	spin_lock(&resv->lock);
-	/* Locate the region we are either in or before. */
-	list_for_each_entry(rg, head, link)
-		if (end <= rg->to)
+	list_for_each_entry_safe(rg, trg, head, link) {
+		if (rg->to <= f)
+			continue;
+		if (rg->from >= t)
 			break;
-	if (&rg->link == head)
-		goto out;
 
-	/* If we are in the middle of a region then adjust it. */
-	if (end > rg->from) {
-		chg = rg->to - end;
-		rg->to = end;
-		rg = list_entry(rg->link.next, typeof(*rg), link);
-	}
+		if (f > rg->from && t < rg->to) { /* Must split region */
+			/*
+			 * Check for an entry in the cache before dropping
+			 * lock and attempting allocation.
+			 */
+			if (!nrg &&
+			    resv->region_cache_count > resv->adds_in_progress) {
+				nrg = list_first_entry(&resv->region_cache,
+							struct file_region,
+							link);
+				list_del(&nrg->link);
+				resv->region_cache_count--;
+			}
 
-	/* Drop any remaining regions. */
-	list_for_each_entry_safe(rg, trg, rg->link.prev, link) {
-		if (&rg->link == head)
+			if (!nrg) {
+				spin_unlock(&resv->lock);
+				nrg = kmalloc(sizeof(*nrg), GFP_KERNEL);
+				if (!nrg)
+					return -ENOMEM;
+				goto retry;
+			}
+
+			del += t - f;
+
+			/* New entry for end of split region */
+			nrg->from = t;
+			nrg->to = rg->to;
+			INIT_LIST_HEAD(&nrg->link);
+
+			/* Original entry is trimmed */
+			rg->to = f;
+
+			list_add(&nrg->link, &rg->link);
+			nrg = NULL;
 			break;
-		chg += rg->to - rg->from;
-		list_del(&rg->link);
-		kfree(rg);
+		}
+
+		if (f <= rg->from && t >= rg->to) { /* Remove entire region */
+			del += rg->to - rg->from;
+			list_del(&rg->link);
+			kfree(rg);
+			continue;
+		}
+
+		if (f <= rg->from) {	/* Trim beginning of region */
+			del += t - rg->from;
+			rg->from = t;
+		} else {		/* Trim end of region */
+			del += rg->to - f;
+			rg->to = f;
+		}
 	}
 
-out:
 	spin_unlock(&resv->lock);
-	return chg;
+	kfree(nrg);
+	return del;
 }
 
 /*
@@ -647,7 +694,7 @@ void resv_map_release(struct kref *ref)
 	struct file_region *rg, *trg;
 
 	/* Clear out any active regions before we release the map. */
-	region_truncate(resv_map, 0);
+	region_del(resv_map, 0, LONG_MAX);
 
 	/* ... and any entries left in the cache */
 	list_for_each_entry_safe(rg, trg, head, link) {
@@ -1572,7 +1619,7 @@ static void return_unused_surplus_pages(struct hstate *h,
 
 
 /*
- * vma_needs_reservation, vma_commit_reservation and vma_abort_reservation
+ * vma_needs_reservation, vma_commit_reservation and vma_end_reservation
  * are used by the huge page allocation routines to manage reservations.
  *
  * vma_needs_reservation is called to determine if the huge page at addr
@@ -1580,8 +1627,9 @@ static void return_unused_surplus_pages(struct hstate *h,
  * needed, the value 1 is returned.  The caller is then responsible for
  * managing the global reservation and subpool usage counts.  After
  * the huge page has been allocated, vma_commit_reservation is called
- * to add the page to the reservation map.  If the reservation must be
- * aborted instead of committed, vma_abort_reservation is called.
+ * to add the page to the reservation map.  If the page allocation fails,
+ * the reservation must be ended instead of committed.  vma_end_reservation
+ * is called in such cases.
  *
  * In the normal case, vma_commit_reservation returns the same value
  * as the preceding vma_needs_reservation call.  The only time this
@@ -1592,7 +1640,7 @@ static void return_unused_surplus_pages(struct hstate *h,
 enum vma_resv_mode {
 	VMA_NEEDS_RESV,
 	VMA_COMMIT_RESV,
-	VMA_ABORT_RESV,
+	VMA_END_RESV,
 };
 static long __vma_reservation_common(struct hstate *h,
 				struct vm_area_struct *vma, unsigned long addr,
@@ -1614,7 +1662,7 @@ static long __vma_reservation_common(struct hstate *h,
 	case VMA_COMMIT_RESV:
 		ret = region_add(resv, idx, idx + 1);
 		break;
-	case VMA_ABORT_RESV:
+	case VMA_END_RESV:
 		region_abort(resv, idx, idx + 1);
 		ret = 0;
 		break;
@@ -1640,10 +1688,10 @@ static long vma_commit_reservation(struct hstate *h,
 	return __vma_reservation_common(h, vma, addr, VMA_COMMIT_RESV);
 }
 
-static void vma_abort_reservation(struct hstate *h,
+static void vma_end_reservation(struct hstate *h,
 			struct vm_area_struct *vma, unsigned long addr)
 {
-	(void)__vma_reservation_common(h, vma, addr, VMA_ABORT_RESV);
+	(void)__vma_reservation_common(h, vma, addr, VMA_END_RESV);
 }
 
 static struct page *alloc_huge_page(struct vm_area_struct *vma,
@@ -1670,7 +1718,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 		return ERR_PTR(-ENOMEM);
 	if (chg || avoid_reserve)
 		if (hugepage_subpool_get_pages(spool, 1) < 0) {
-			vma_abort_reservation(h, vma, addr);
+			vma_end_reservation(h, vma, addr);
 			return ERR_PTR(-ENOSPC);
 		}
 
@@ -1718,7 +1766,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 out_subpool_put:
 	if (chg || avoid_reserve)
 		hugepage_subpool_put_pages(spool, 1);
-	vma_abort_reservation(h, vma, addr);
+	vma_end_reservation(h, vma, addr);
 	return ERR_PTR(-ENOSPC);
 }
 
@@ -3365,7 +3413,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			goto backout_unlocked;
 		}
 		/* Just decrements count, does not deallocate */
-		vma_abort_reservation(h, vma, address);
+		vma_end_reservation(h, vma, address);
 	}
 
 	ptl = huge_pte_lockptr(h, mm, ptep);
@@ -3514,7 +3562,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 			goto out_mutex;
 		}
 		/* Just decrements count, does not deallocate */
-		vma_abort_reservation(h, vma, address);
+		vma_end_reservation(h, vma, address);
 
 		if (!(vma->vm_flags & VM_MAYSHARE))
 			pagecache_page = hugetlbfs_pagecache_page(h,
@@ -3870,7 +3918,7 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
 	long gbl_reserve;
 
 	if (resv_map)
-		chg = region_truncate(resv_map, offset);
+		chg = region_del(resv_map, offset, LONG_MAX);
 	spin_lock(&inode->i_lock);
 	inode->i_blocks -= (blocks_per_huge_page(h) * freed);
 	spin_unlock(&inode->i_lock);

From c672c7f29f2fdb73e1f72911bf499675c81fcdbb Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Tue, 8 Sep 2015 15:01:35 -0700
Subject: [PATCH 661/734] mm/hugetlb: expose hugetlb fault mutex for use by
 fallocate

hugetlb page faults are currently synchronized by the table of mutexes
(htlb_fault_mutex_table).  fallocate code will need to synchronize with
the page fault code when it allocates or deletes pages.  Expose
interfaces so that fallocate operations can be synchronized with page
faults.  Minor name changes to be more consistent with other global
hugetlb symbols.

Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Aneesh Kumar <aneesh.kumar@linux.vnet.ibm.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h |  5 +++++
 mm/hugetlb.c            | 20 ++++++++++----------
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index e2d94960b38bce..530cf6fc24c7e9 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -88,6 +88,11 @@ int dequeue_hwpoisoned_huge_page(struct page *page);
 bool isolate_huge_page(struct page *page, struct list_head *list);
 void putback_active_hugepage(struct page *page);
 void free_huge_page(struct page *page);
+extern struct mutex *hugetlb_fault_mutex_table;
+u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm,
+				struct vm_area_struct *vma,
+				struct address_space *mapping,
+				pgoff_t idx, unsigned long address);
 
 #ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
 pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 78e7eded4063c3..070880fe1ff718 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -64,7 +64,7 @@ DEFINE_SPINLOCK(hugetlb_lock);
  * prevent spurious OOMs when the hugepage pool is fully utilized.
  */
 static int num_fault_mutexes;
-static struct mutex *htlb_fault_mutex_table ____cacheline_aligned_in_smp;
+struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp;
 
 /* Forward declaration */
 static int hugetlb_acct_memory(struct hstate *h, long delta);
@@ -2482,7 +2482,7 @@ static void __exit hugetlb_exit(void)
 	}
 
 	kobject_put(hugepages_kobj);
-	kfree(htlb_fault_mutex_table);
+	kfree(hugetlb_fault_mutex_table);
 }
 module_exit(hugetlb_exit);
 
@@ -2515,12 +2515,12 @@ static int __init hugetlb_init(void)
 #else
 	num_fault_mutexes = 1;
 #endif
-	htlb_fault_mutex_table =
+	hugetlb_fault_mutex_table =
 		kmalloc(sizeof(struct mutex) * num_fault_mutexes, GFP_KERNEL);
-	BUG_ON(!htlb_fault_mutex_table);
+	BUG_ON(!hugetlb_fault_mutex_table);
 
 	for (i = 0; i < num_fault_mutexes; i++)
-		mutex_init(&htlb_fault_mutex_table[i]);
+		mutex_init(&hugetlb_fault_mutex_table[i]);
 	return 0;
 }
 module_init(hugetlb_init);
@@ -3454,7 +3454,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 }
 
 #ifdef CONFIG_SMP
-static u32 fault_mutex_hash(struct hstate *h, struct mm_struct *mm,
+u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm,
 			    struct vm_area_struct *vma,
 			    struct address_space *mapping,
 			    pgoff_t idx, unsigned long address)
@@ -3479,7 +3479,7 @@ static u32 fault_mutex_hash(struct hstate *h, struct mm_struct *mm,
  * For uniprocesor systems we always use a single mutex, so just
  * return 0 and avoid the hashing overhead.
  */
-static u32 fault_mutex_hash(struct hstate *h, struct mm_struct *mm,
+u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm,
 			    struct vm_area_struct *vma,
 			    struct address_space *mapping,
 			    pgoff_t idx, unsigned long address)
@@ -3527,8 +3527,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * get spurious allocation failures if two CPUs race to instantiate
 	 * the same page in the page cache.
 	 */
-	hash = fault_mutex_hash(h, mm, vma, mapping, idx, address);
-	mutex_lock(&htlb_fault_mutex_table[hash]);
+	hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping, idx, address);
+	mutex_lock(&hugetlb_fault_mutex_table[hash]);
 
 	entry = huge_ptep_get(ptep);
 	if (huge_pte_none(entry)) {
@@ -3613,7 +3613,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		put_page(pagecache_page);
 	}
 out_mutex:
-	mutex_unlock(&htlb_fault_mutex_table[hash]);
+	mutex_unlock(&hugetlb_fault_mutex_table[hash]);
 	/*
 	 * Generally it's safe to hold refcount during waiting page lock. But
 	 * here we just wait to defer the next page fault to avoid busy loop and

From 1bfad99ab42569807d0ca1698449cae5e8c0334a Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Tue, 8 Sep 2015 15:01:38 -0700
Subject: [PATCH 662/734] hugetlbfs: hugetlb_vmtruncate_list() needs to take a
 range to delete

fallocate hole punch will want to unmap a specific range of pages.
Modify the existing hugetlb_vmtruncate_list() routine to take a
start/end range.  If end is 0, this indicates all pages after start
should be unmapped.  This is the same as the existing truncate
functionality.  Modify existing callers to add 0 as end of range.

Since the routine will be used in hole punch as well as truncate
operations, it is more appropriately renamed to hugetlb_vmdelete_list().

Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Aneesh Kumar <aneesh.kumar@linux.vnet.ibm.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hugetlbfs/inode.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 973c24ce59ad3e..b1e197d38abb27 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -349,11 +349,15 @@ static void hugetlbfs_evict_inode(struct inode *inode)
 }
 
 static inline void
-hugetlb_vmtruncate_list(struct rb_root *root, pgoff_t pgoff)
+hugetlb_vmdelete_list(struct rb_root *root, pgoff_t start, pgoff_t end)
 {
 	struct vm_area_struct *vma;
 
-	vma_interval_tree_foreach(vma, root, pgoff, ULONG_MAX) {
+	/*
+	 * end == 0 indicates that the entire range after
+	 * start should be unmapped.
+	 */
+	vma_interval_tree_foreach(vma, root, start, end ? end : ULONG_MAX) {
 		unsigned long v_offset;
 
 		/*
@@ -362,13 +366,20 @@ hugetlb_vmtruncate_list(struct rb_root *root, pgoff_t pgoff)
 		 * which overlap the truncated area starting at pgoff,
 		 * and no vma on a 32-bit arch can span beyond the 4GB.
 		 */
-		if (vma->vm_pgoff < pgoff)
-			v_offset = (pgoff - vma->vm_pgoff) << PAGE_SHIFT;
+		if (vma->vm_pgoff < start)
+			v_offset = (start - vma->vm_pgoff) << PAGE_SHIFT;
 		else
 			v_offset = 0;
 
-		unmap_hugepage_range(vma, vma->vm_start + v_offset,
-				     vma->vm_end, NULL);
+		if (end) {
+			end = ((end - start) << PAGE_SHIFT) +
+			       vma->vm_start + v_offset;
+			if (end > vma->vm_end)
+				end = vma->vm_end;
+		} else
+			end = vma->vm_end;
+
+		unmap_hugepage_range(vma, vma->vm_start + v_offset, end, NULL);
 	}
 }
 
@@ -384,7 +395,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
 	i_size_write(inode, offset);
 	i_mmap_lock_write(mapping);
 	if (!RB_EMPTY_ROOT(&mapping->i_mmap))
-		hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);
+		hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0);
 	i_mmap_unlock_write(mapping);
 	truncate_hugepages(inode, offset);
 	return 0;

From b5cec28d36f5ee6b4e6f68a0a40aa1e4045d6d99 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Tue, 8 Sep 2015 15:01:41 -0700
Subject: [PATCH 663/734] hugetlbfs: truncate_hugepages() takes a range of
 pages

Modify truncate_hugepages() to take a range of pages (start, end)
instead of simply start.  If an end value of LLONG_MAX is passed, the
current "truncate" functionality is maintained.  Existing callers are
modified to pass LLONG_MAX as end of range.  By keying off end ==
LLONG_MAX, the routine behaves differently for truncate and hole punch.
Page removal is now synchronized with page allocation via faults by
using the fault mutex table.  The hole punch case can experience the
rare region_del error and must handle accordingly.

Add the routine hugetlb_fix_reserve_counts to fix up reserve counts in
the case where region_del returns an error.

Since the routine handles more than just the truncate case, it is
renamed to remove_inode_hugepages().  To be consistent, the routine
truncate_huge_page() is renamed remove_huge_page().

Downstream of remove_inode_hugepages(), the routine
hugetlb_unreserve_pages() is also modified to take a range of pages.
hugetlb_unreserve_pages is modified to detect an error from region_del and
pass it back to the caller.

Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Aneesh Kumar <aneesh.kumar@linux.vnet.ibm.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hugetlbfs/inode.c    | 98 ++++++++++++++++++++++++++++++++++++-----
 include/linux/hugetlb.h |  4 +-
 mm/hugetlb.c            | 40 +++++++++++++++--
 3 files changed, 128 insertions(+), 14 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index b1e197d38abb27..1ef630f81c991a 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -293,26 +293,61 @@ static int hugetlbfs_write_end(struct file *file, struct address_space *mapping,
 	return -EINVAL;
 }
 
-static void truncate_huge_page(struct page *page)
+static void remove_huge_page(struct page *page)
 {
 	ClearPageDirty(page);
 	ClearPageUptodate(page);
 	delete_from_page_cache(page);
 }
 
-static void truncate_hugepages(struct inode *inode, loff_t lstart)
+
+/*
+ * remove_inode_hugepages handles two distinct cases: truncation and hole
+ * punch.  There are subtle differences in operation for each case.
+
+ * truncation is indicated by end of range being LLONG_MAX
+ *	In this case, we first scan the range and release found pages.
+ *	After releasing pages, hugetlb_unreserve_pages cleans up region/reserv
+ *	maps and global counts.
+ * hole punch is indicated if end is not LLONG_MAX
+ *	In the hole punch case we scan the range and release found pages.
+ *	Only when releasing a page is the associated region/reserv map
+ *	deleted.  The region/reserv map for ranges without associated
+ *	pages are not modified.
+ * Note: If the passed end of range value is beyond the end of file, but
+ * not LLONG_MAX this routine still performs a hole punch operation.
+ */
+static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
+				   loff_t lend)
 {
 	struct hstate *h = hstate_inode(inode);
 	struct address_space *mapping = &inode->i_data;
 	const pgoff_t start = lstart >> huge_page_shift(h);
+	const pgoff_t end = lend >> huge_page_shift(h);
+	struct vm_area_struct pseudo_vma;
 	struct pagevec pvec;
 	pgoff_t next;
 	int i, freed = 0;
+	long lookup_nr = PAGEVEC_SIZE;
+	bool truncate_op = (lend == LLONG_MAX);
 
+	memset(&pseudo_vma, 0, sizeof(struct vm_area_struct));
+	pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
 	pagevec_init(&pvec, 0);
 	next = start;
-	while (1) {
-		if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
+	while (next < end) {
+		/*
+		 * Make sure to never grab more pages that we
+		 * might possibly need.
+		 */
+		if (end - next < lookup_nr)
+			lookup_nr = end - next;
+
+		/*
+		 * This pagevec_lookup() may return pages past 'end',
+		 * so we must check for page->index > end.
+		 */
+		if (!pagevec_lookup(&pvec, mapping, next, lookup_nr)) {
 			if (next == start)
 				break;
 			next = start;
@@ -321,26 +356,69 @@ static void truncate_hugepages(struct inode *inode, loff_t lstart)
 
 		for (i = 0; i < pagevec_count(&pvec); ++i) {
 			struct page *page = pvec.pages[i];
+			u32 hash;
+
+			hash = hugetlb_fault_mutex_hash(h, current->mm,
+							&pseudo_vma,
+							mapping, next, 0);
+			mutex_lock(&hugetlb_fault_mutex_table[hash]);
 
 			lock_page(page);
+			if (page->index >= end) {
+				unlock_page(page);
+				mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+				next = end;	/* we are done */
+				break;
+			}
+
+			/*
+			 * If page is mapped, it was faulted in after being
+			 * unmapped.  Do nothing in this race case.  In the
+			 * normal case page is not mapped.
+			 */
+			if (!page_mapped(page)) {
+				bool rsv_on_error = !PagePrivate(page);
+				/*
+				 * We must free the huge page and remove
+				 * from page cache (remove_huge_page) BEFORE
+				 * removing the region/reserve map
+				 * (hugetlb_unreserve_pages).  In rare out
+				 * of memory conditions, removal of the
+				 * region/reserve map could fail.  Before
+				 * free'ing the page, note PagePrivate which
+				 * is used in case of error.
+				 */
+				remove_huge_page(page);
+				freed++;
+				if (!truncate_op) {
+					if (unlikely(hugetlb_unreserve_pages(
+							inode, next,
+							next + 1, 1)))
+						hugetlb_fix_reserve_counts(
+							inode, rsv_on_error);
+				}
+			}
+
 			if (page->index > next)
 				next = page->index;
+
 			++next;
-			truncate_huge_page(page);
 			unlock_page(page);
-			freed++;
+
+			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
 		}
 		huge_pagevec_release(&pvec);
 	}
-	BUG_ON(!lstart && mapping->nrpages);
-	hugetlb_unreserve_pages(inode, start, freed);
+
+	if (truncate_op)
+		(void)hugetlb_unreserve_pages(inode, start, LONG_MAX, freed);
 }
 
 static void hugetlbfs_evict_inode(struct inode *inode)
 {
 	struct resv_map *resv_map;
 
-	truncate_hugepages(inode, 0);
+	remove_inode_hugepages(inode, 0, LLONG_MAX);
 	resv_map = (struct resv_map *)inode->i_mapping->private_data;
 	/* root inode doesn't have the resv_map, so we should check it */
 	if (resv_map)
@@ -397,7 +475,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
 	if (!RB_EMPTY_ROOT(&mapping->i_mmap))
 		hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0);
 	i_mmap_unlock_write(mapping);
-	truncate_hugepages(inode, offset);
+	remove_inode_hugepages(inode, offset, LLONG_MAX);
 	return 0;
 }
 
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 530cf6fc24c7e9..35afca1692fb9d 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -83,11 +83,13 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 int hugetlb_reserve_pages(struct inode *inode, long from, long to,
 						struct vm_area_struct *vma,
 						vm_flags_t vm_flags);
-void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
+long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
+						long freed);
 int dequeue_hwpoisoned_huge_page(struct page *page);
 bool isolate_huge_page(struct page *page, struct list_head *list);
 void putback_active_hugepage(struct page *page);
 void free_huge_page(struct page *page);
+void hugetlb_fix_reserve_counts(struct inode *inode, bool restore_reserve);
 extern struct mutex *hugetlb_fault_mutex_table;
 u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm,
 				struct vm_area_struct *vma,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 070880fe1ff718..61c52cd5f77b77 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -546,6 +546,28 @@ static long region_del(struct resv_map *resv, long f, long t)
 	return del;
 }
 
+/*
+ * A rare out of memory error was encountered which prevented removal of
+ * the reserve map region for a page.  The huge page itself was free'ed
+ * and removed from the page cache.  This routine will adjust the subpool
+ * usage count, and the global reserve count if needed.  By incrementing
+ * these counts, the reserve map entry which could not be deleted will
+ * appear as a "reserved" entry instead of simply dangling with incorrect
+ * counts.
+ */
+void hugetlb_fix_reserve_counts(struct inode *inode, bool restore_reserve)
+{
+	struct hugepage_subpool *spool = subpool_inode(inode);
+	long rsv_adjust;
+
+	rsv_adjust = hugepage_subpool_get_pages(spool, 1);
+	if (restore_reserve && rsv_adjust) {
+		struct hstate *h = hstate_inode(inode);
+
+		hugetlb_acct_memory(h, 1);
+	}
+}
+
 /*
  * Count and return the number of huge pages in the reserve map
  * that intersect with the range [f, t).
@@ -3909,7 +3931,8 @@ int hugetlb_reserve_pages(struct inode *inode,
 	return ret;
 }
 
-void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
+long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
+								long freed)
 {
 	struct hstate *h = hstate_inode(inode);
 	struct resv_map *resv_map = inode_resv_map(inode);
@@ -3917,8 +3940,17 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
 	struct hugepage_subpool *spool = subpool_inode(inode);
 	long gbl_reserve;
 
-	if (resv_map)
-		chg = region_del(resv_map, offset, LONG_MAX);
+	if (resv_map) {
+		chg = region_del(resv_map, start, end);
+		/*
+		 * region_del() can fail in the rare case where a region
+		 * must be split and another region descriptor can not be
+		 * allocated.  If end == LONG_MAX, it will not fail.
+		 */
+		if (chg < 0)
+			return chg;
+	}
+
 	spin_lock(&inode->i_lock);
 	inode->i_blocks -= (blocks_per_huge_page(h) * freed);
 	spin_unlock(&inode->i_lock);
@@ -3929,6 +3961,8 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
 	 */
 	gbl_reserve = hugepage_subpool_put_pages(spool, (chg - freed));
 	hugetlb_acct_memory(h, -gbl_reserve);
+
+	return 0;
 }
 
 #ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE

From 1fb1b0e9ef2d661488f8053986c3b7641cae529d Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Tue, 8 Sep 2015 15:01:44 -0700
Subject: [PATCH 664/734] mm/hugetlb: vma_has_reserves() needs to handle
 fallocate hole punch

In vma_has_reserves(), the current assumption is that reserves are
always present for shared mappings.  However, this will not be the case
with fallocate hole punch.  When punching a hole, the present page will
be deleted as well as the region/reserve map entry (and hence any
reservation).  vma_has_reserves is passed "chg" which indicates whether
or not a region/reserve map is present.  Use this to determine if
reserves are actually present or were removed via hole punch.

Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Aneesh Kumar <aneesh.kumar@linux.vnet.ibm.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 61c52cd5f77b77..bd12e8c8bc7b47 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -801,8 +801,19 @@ static bool vma_has_reserves(struct vm_area_struct *vma, long chg)
 	}
 
 	/* Shared mappings always use reserves */
-	if (vma->vm_flags & VM_MAYSHARE)
-		return true;
+	if (vma->vm_flags & VM_MAYSHARE) {
+		/*
+		 * We know VM_NORESERVE is not set.  Therefore, there SHOULD
+		 * be a region map for all pages.  The only situation where
+		 * there is no region map is if a hole was punched via
+		 * fallocate.  In this case, there really are no reverves to
+		 * use.  This situation is indicated if chg != 0.
+		 */
+		if (chg)
+			return false;
+		else
+			return true;
+	}
 
 	/*
 	 * Only the process that called mmap() has reserves for

From d85f69b0b533ec6d7ac8c21db958c44c6d957c90 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Tue, 8 Sep 2015 15:01:47 -0700
Subject: [PATCH 665/734] mm/hugetlb: alloc_huge_page handle areas hole punched
 by fallocate

Areas hole punched by fallocate will not have entries in the
region/reserve map.  However, shared mappings with min_size subpool
reservations may still have reserved pages.  alloc_huge_page needs to
handle this special case and do the proper accounting.

Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Aneesh Kumar <aneesh.kumar@linux.vnet.ibm.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 54 +++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 39 insertions(+), 15 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index bd12e8c8bc7b47..114ad6ce7030ad 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1733,34 +1733,58 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 	struct hugepage_subpool *spool = subpool_vma(vma);
 	struct hstate *h = hstate_vma(vma);
 	struct page *page;
-	long chg, commit;
+	long map_chg, map_commit;
+	long gbl_chg;
 	int ret, idx;
 	struct hugetlb_cgroup *h_cg;
 
 	idx = hstate_index(h);
 	/*
-	 * Processes that did not create the mapping will have no
-	 * reserves and will not have accounted against subpool
-	 * limit. Check that the subpool limit can be made before
-	 * satisfying the allocation MAP_NORESERVE mappings may also
-	 * need pages and subpool limit allocated allocated if no reserve
-	 * mapping overlaps.
+	 * Examine the region/reserve map to determine if the process
+	 * has a reservation for the page to be allocated.  A return
+	 * code of zero indicates a reservation exists (no change).
 	 */
-	chg = vma_needs_reservation(h, vma, addr);
-	if (chg < 0)
+	map_chg = gbl_chg = vma_needs_reservation(h, vma, addr);
+	if (map_chg < 0)
 		return ERR_PTR(-ENOMEM);
-	if (chg || avoid_reserve)
-		if (hugepage_subpool_get_pages(spool, 1) < 0) {
+
+	/*
+	 * Processes that did not create the mapping will have no
+	 * reserves as indicated by the region/reserve map. Check
+	 * that the allocation will not exceed the subpool limit.
+	 * Allocations for MAP_NORESERVE mappings also need to be
+	 * checked against any subpool limit.
+	 */
+	if (map_chg || avoid_reserve) {
+		gbl_chg = hugepage_subpool_get_pages(spool, 1);
+		if (gbl_chg < 0) {
 			vma_end_reservation(h, vma, addr);
 			return ERR_PTR(-ENOSPC);
 		}
 
+		/*
+		 * Even though there was no reservation in the region/reserve
+		 * map, there could be reservations associated with the
+		 * subpool that can be used.  This would be indicated if the
+		 * return value of hugepage_subpool_get_pages() is zero.
+		 * However, if avoid_reserve is specified we still avoid even
+		 * the subpool reservations.
+		 */
+		if (avoid_reserve)
+			gbl_chg = 1;
+	}
+
 	ret = hugetlb_cgroup_charge_cgroup(idx, pages_per_huge_page(h), &h_cg);
 	if (ret)
 		goto out_subpool_put;
 
 	spin_lock(&hugetlb_lock);
-	page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, chg);
+	/*
+	 * glb_chg is passed to indicate whether or not a page must be taken
+	 * from the global free pool (global change).  gbl_chg == 0 indicates
+	 * a reservation exists for the allocation.
+	 */
+	page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, gbl_chg);
 	if (!page) {
 		spin_unlock(&hugetlb_lock);
 		page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
@@ -1776,8 +1800,8 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 
 	set_page_private(page, (unsigned long)spool);
 
-	commit = vma_commit_reservation(h, vma, addr);
-	if (unlikely(chg > commit)) {
+	map_commit = vma_commit_reservation(h, vma, addr);
+	if (unlikely(map_chg > map_commit)) {
 		/*
 		 * The page was added to the reservation map between
 		 * vma_needs_reservation and vma_commit_reservation.
@@ -1797,7 +1821,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 out_uncharge_cgroup:
 	hugetlb_cgroup_uncharge_cgroup(idx, pages_per_huge_page(h), h_cg);
 out_subpool_put:
-	if (chg || avoid_reserve)
+	if (map_chg || avoid_reserve)
 		hugepage_subpool_put_pages(spool, 1);
 	vma_end_reservation(h, vma, addr);
 	return ERR_PTR(-ENOSPC);

From ab76ad540a50191308e5bb6b5e2d9e26c78616d3 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Tue, 8 Sep 2015 15:01:50 -0700
Subject: [PATCH 666/734] hugetlbfs: New huge_add_to_page_cache helper routine

Currently, there is only a single place where hugetlbfs pages are added
to the page cache.  The new fallocate code be adding a second one, so
break the functionality out into its own helper.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Aneesh Kumar <aneesh.kumar@linux.vnet.ibm.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h |  2 ++
 mm/hugetlb.c            | 27 ++++++++++++++++++---------
 2 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 35afca1692fb9d..1222fb07a74654 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -333,6 +333,8 @@ struct huge_bootmem_page {
 struct page *alloc_huge_page_node(struct hstate *h, int nid);
 struct page *alloc_huge_page_noerr(struct vm_area_struct *vma,
 				unsigned long addr, int avoid_reserve);
+int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
+			pgoff_t idx);
 
 /* arch callback */
 int __init alloc_bootmem_huge_page(struct hstate *h);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 114ad6ce7030ad..d45eacc5653ed7 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3375,6 +3375,23 @@ static bool hugetlbfs_pagecache_present(struct hstate *h,
 	return page != NULL;
 }
 
+int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
+			   pgoff_t idx)
+{
+	struct inode *inode = mapping->host;
+	struct hstate *h = hstate_inode(inode);
+	int err = add_to_page_cache(page, mapping, idx, GFP_KERNEL);
+
+	if (err)
+		return err;
+	ClearPagePrivate(page);
+
+	spin_lock(&inode->i_lock);
+	inode->i_blocks += blocks_per_huge_page(h);
+	spin_unlock(&inode->i_lock);
+	return 0;
+}
+
 static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			   struct address_space *mapping, pgoff_t idx,
 			   unsigned long address, pte_t *ptep, unsigned int flags)
@@ -3422,21 +3439,13 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		set_page_huge_active(page);
 
 		if (vma->vm_flags & VM_MAYSHARE) {
-			int err;
-			struct inode *inode = mapping->host;
-
-			err = add_to_page_cache(page, mapping, idx, GFP_KERNEL);
+			int err = huge_add_to_page_cache(page, mapping, idx);
 			if (err) {
 				put_page(page);
 				if (err == -EEXIST)
 					goto retry;
 				goto out;
 			}
-			ClearPagePrivate(page);
-
-			spin_lock(&inode->i_lock);
-			inode->i_blocks += blocks_per_huge_page(h);
-			spin_unlock(&inode->i_lock);
 		} else {
 			lock_page(page);
 			if (unlikely(anon_vma_prepare(vma))) {

From 70c3547e36f5c9fbc4caecfeca98f0effa6932c5 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Tue, 8 Sep 2015 15:01:54 -0700
Subject: [PATCH 667/734] hugetlbfs: add hugetlbfs_fallocate()

This is based on the shmem version, but it has diverged quite a bit.  We
have no swap to worry about, nor the new file sealing.  Add
synchronication via the fault mutex table to coordinate page faults,
fallocate allocation and fallocate hole punch.

What this allows us to do is move physical memory in and out of a
hugetlbfs file without having it mapped.  This also gives us the ability
to support MADV_REMOVE since it is currently implemented using
fallocate().  MADV_REMOVE lets madvise() remove pages from the middle of
a hugetlbfs file, which wasn't possible before.

hugetlbfs fallocate only operates on whole huge pages.

Based on code by Dave Hansen.

Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Aneesh Kumar <aneesh.kumar@linux.vnet.ibm.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hugetlbfs/inode.c    | 179 +++++++++++++++++++++++++++++++++++++++-
 include/linux/hugetlb.h |   3 +
 mm/hugetlb.c            |   2 +-
 3 files changed, 182 insertions(+), 2 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 1ef630f81c991a..316adb968b6588 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -12,6 +12,7 @@
 #include <linux/thread_info.h>
 #include <asm/current.h>
 #include <linux/sched.h>		/* remove ASAP */
+#include <linux/falloc.h>
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/file.h>
@@ -84,6 +85,29 @@ static const match_table_t tokens = {
 	{Opt_err,	NULL},
 };
 
+#ifdef CONFIG_NUMA
+static inline void hugetlb_set_vma_policy(struct vm_area_struct *vma,
+					struct inode *inode, pgoff_t index)
+{
+	vma->vm_policy = mpol_shared_policy_lookup(&HUGETLBFS_I(inode)->policy,
+							index);
+}
+
+static inline void hugetlb_drop_vma_policy(struct vm_area_struct *vma)
+{
+	mpol_cond_put(vma->vm_policy);
+}
+#else
+static inline void hugetlb_set_vma_policy(struct vm_area_struct *vma,
+					struct inode *inode, pgoff_t index)
+{
+}
+
+static inline void hugetlb_drop_vma_policy(struct vm_area_struct *vma)
+{
+}
+#endif
+
 static void huge_pagevec_release(struct pagevec *pvec)
 {
 	int i;
@@ -479,6 +503,158 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
 	return 0;
 }
 
+static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
+{
+	struct hstate *h = hstate_inode(inode);
+	loff_t hpage_size = huge_page_size(h);
+	loff_t hole_start, hole_end;
+
+	/*
+	 * For hole punch round up the beginning offset of the hole and
+	 * round down the end.
+	 */
+	hole_start = round_up(offset, hpage_size);
+	hole_end = round_down(offset + len, hpage_size);
+
+	if (hole_end > hole_start) {
+		struct address_space *mapping = inode->i_mapping;
+
+		mutex_lock(&inode->i_mutex);
+		i_mmap_lock_write(mapping);
+		if (!RB_EMPTY_ROOT(&mapping->i_mmap))
+			hugetlb_vmdelete_list(&mapping->i_mmap,
+						hole_start >> PAGE_SHIFT,
+						hole_end  >> PAGE_SHIFT);
+		i_mmap_unlock_write(mapping);
+		remove_inode_hugepages(inode, hole_start, hole_end);
+		mutex_unlock(&inode->i_mutex);
+	}
+
+	return 0;
+}
+
+static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
+				loff_t len)
+{
+	struct inode *inode = file_inode(file);
+	struct address_space *mapping = inode->i_mapping;
+	struct hstate *h = hstate_inode(inode);
+	struct vm_area_struct pseudo_vma;
+	struct mm_struct *mm = current->mm;
+	loff_t hpage_size = huge_page_size(h);
+	unsigned long hpage_shift = huge_page_shift(h);
+	pgoff_t start, index, end;
+	int error;
+	u32 hash;
+
+	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+		return -EOPNOTSUPP;
+
+	if (mode & FALLOC_FL_PUNCH_HOLE)
+		return hugetlbfs_punch_hole(inode, offset, len);
+
+	/*
+	 * Default preallocate case.
+	 * For this range, start is rounded down and end is rounded up
+	 * as well as being converted to page offsets.
+	 */
+	start = offset >> hpage_shift;
+	end = (offset + len + hpage_size - 1) >> hpage_shift;
+
+	mutex_lock(&inode->i_mutex);
+
+	/* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */
+	error = inode_newsize_ok(inode, offset + len);
+	if (error)
+		goto out;
+
+	/*
+	 * Initialize a pseudo vma as this is required by the huge page
+	 * allocation routines.  If NUMA is configured, use page index
+	 * as input to create an allocation policy.
+	 */
+	memset(&pseudo_vma, 0, sizeof(struct vm_area_struct));
+	pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
+	pseudo_vma.vm_file = file;
+
+	for (index = start; index < end; index++) {
+		/*
+		 * This is supposed to be the vaddr where the page is being
+		 * faulted in, but we have no vaddr here.
+		 */
+		struct page *page;
+		unsigned long addr;
+		int avoid_reserve = 0;
+
+		cond_resched();
+
+		/*
+		 * fallocate(2) manpage permits EINTR; we may have been
+		 * interrupted because we are using up too much memory.
+		 */
+		if (signal_pending(current)) {
+			error = -EINTR;
+			break;
+		}
+
+		/* Set numa allocation policy based on index */
+		hugetlb_set_vma_policy(&pseudo_vma, inode, index);
+
+		/* addr is the offset within the file (zero based) */
+		addr = index * hpage_size;
+
+		/* mutex taken here, fault path and hole punch */
+		hash = hugetlb_fault_mutex_hash(h, mm, &pseudo_vma, mapping,
+						index, addr);
+		mutex_lock(&hugetlb_fault_mutex_table[hash]);
+
+		/* See if already present in mapping to avoid alloc/free */
+		page = find_get_page(mapping, index);
+		if (page) {
+			put_page(page);
+			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+			hugetlb_drop_vma_policy(&pseudo_vma);
+			continue;
+		}
+
+		/* Allocate page and add to page cache */
+		page = alloc_huge_page(&pseudo_vma, addr, avoid_reserve);
+		hugetlb_drop_vma_policy(&pseudo_vma);
+		if (IS_ERR(page)) {
+			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+			error = PTR_ERR(page);
+			goto out;
+		}
+		clear_huge_page(page, addr, pages_per_huge_page(h));
+		__SetPageUptodate(page);
+		error = huge_add_to_page_cache(page, mapping, index);
+		if (unlikely(error)) {
+			put_page(page);
+			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+			goto out;
+		}
+
+		mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+
+		/*
+		 * page_put due to reference from alloc_huge_page()
+		 * unlock_page because locked by add_to_page_cache()
+		 */
+		put_page(page);
+		unlock_page(page);
+	}
+
+	if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
+		i_size_write(inode, offset + len);
+	inode->i_ctime = CURRENT_TIME;
+	spin_lock(&inode->i_lock);
+	inode->i_private = NULL;
+	spin_unlock(&inode->i_lock);
+out:
+	mutex_unlock(&inode->i_mutex);
+	return error;
+}
+
 static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
 {
 	struct inode *inode = d_inode(dentry);
@@ -790,7 +966,8 @@ const struct file_operations hugetlbfs_file_operations = {
 	.mmap			= hugetlbfs_file_mmap,
 	.fsync			= noop_fsync,
 	.get_unmapped_area	= hugetlb_get_unmapped_area,
-	.llseek		= default_llseek,
+	.llseek			= default_llseek,
+	.fallocate		= hugetlbfs_fallocate,
 };
 
 static const struct inode_operations hugetlbfs_dir_inode_operations = {
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 1222fb07a74654..5e35379f58a53d 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -330,6 +330,8 @@ struct huge_bootmem_page {
 #endif
 };
 
+struct page *alloc_huge_page(struct vm_area_struct *vma,
+				unsigned long addr, int avoid_reserve);
 struct page *alloc_huge_page_node(struct hstate *h, int nid);
 struct page *alloc_huge_page_noerr(struct vm_area_struct *vma,
 				unsigned long addr, int avoid_reserve);
@@ -483,6 +485,7 @@ static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
 
 #else	/* CONFIG_HUGETLB_PAGE */
 struct hstate {};
+#define alloc_huge_page(v, a, r) NULL
 #define alloc_huge_page_node(h, nid) NULL
 #define alloc_huge_page_noerr(v, a, r) NULL
 #define alloc_bootmem_huge_page(h) NULL
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index d45eacc5653ed7..cd1280c487ff94 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1727,7 +1727,7 @@ static void vma_end_reservation(struct hstate *h,
 	(void)__vma_reservation_common(h, vma, addr, VMA_END_RESV);
 }
 
-static struct page *alloc_huge_page(struct vm_area_struct *vma,
+struct page *alloc_huge_page(struct vm_area_struct *vma,
 				    unsigned long addr, int avoid_reserve)
 {
 	struct hugepage_subpool *spool = subpool_vma(vma);

From 72079ba0dfefc1444b4ef98a2fa3d040838a775f Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Tue, 8 Sep 2015 15:01:57 -0700
Subject: [PATCH 668/734] mm: madvise allow remove operation for hugetlbfs

Now that we have hole punching support for hugetlbfs, we can also
support the MADV_REMOVE interface to it.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Aneesh Kumar <aneesh.kumar@linux.vnet.ibm.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/madvise.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/madvise.c b/mm/madvise.c
index ce3a4222c7e7ae..c889fcbb530e98 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -301,7 +301,7 @@ static long madvise_remove(struct vm_area_struct *vma,
 
 	*prev = NULL;	/* tell sys_madvise we drop mmap_sem */
 
-	if (vma->vm_flags & (VM_LOCKED | VM_HUGETLB))
+	if (vma->vm_flags & VM_LOCKED)
 		return -EINVAL;
 
 	f = vma->vm_file;

From c5c5c9d1008fb15945d0173b3ca75931ef53ae1f Mon Sep 17 00:00:00 2001
From: Tang Chen <tangchen@cn.fujitsu.com>
Date: Tue, 8 Sep 2015 15:02:00 -0700
Subject: [PATCH 669/734] mm/memblock.c: make memblock_overlaps_region() return
 bool.

memblock_overlaps_region() checks if the given memblock region
intersects a region in memblock.  If so, it returns the index of the
intersected region.

But its only caller is memblock_is_region_reserved(), and it returns 0
if false, non-zero if true.

Both of these should return bool.

Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tejun Heo <tj@kernel.org>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Luiz Capitulino <lcapitulino@redhat.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Vladimir Murzin <vladimir.murzin@arm.com>
Cc: Fabian Frederick <fabf@skynet.be>
Cc: Alexander Kuleshov <kuleshovmail@gmail.com>
Cc: Baoquan He <bhe@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memblock.h |  2 +-
 mm/memblock.c            | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index cc4b0197206006..d312ae3b51fc6b 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -323,7 +323,7 @@ void memblock_enforce_memory_limit(phys_addr_t memory_limit);
 int memblock_is_memory(phys_addr_t addr);
 int memblock_is_region_memory(phys_addr_t base, phys_addr_t size);
 int memblock_is_reserved(phys_addr_t addr);
-int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size);
+bool memblock_is_region_reserved(phys_addr_t base, phys_addr_t size);
 
 extern void __memblock_dump_all(void);
 
diff --git a/mm/memblock.c b/mm/memblock.c
index bde61e8c28c55b..08a5126338dbfd 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -91,7 +91,7 @@ static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, p
 	return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
 }
 
-static long __init_memblock memblock_overlaps_region(struct memblock_type *type,
+static bool __init_memblock memblock_overlaps_region(struct memblock_type *type,
 					phys_addr_t base, phys_addr_t size)
 {
 	unsigned long i;
@@ -103,7 +103,7 @@ static long __init_memblock memblock_overlaps_region(struct memblock_type *type,
 			break;
 	}
 
-	return (i < type->cnt) ? i : -1;
+	return i < type->cnt;
 }
 
 /*
@@ -1566,12 +1566,12 @@ int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size
  * Check if the region [@base, @base+@size) intersects a reserved memory block.
  *
  * RETURNS:
- * 0 if false, non-zero if true
+ * True if they intersect, false if not.
  */
-int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size)
+bool __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size)
 {
 	memblock_cap_size(base, &size);
-	return memblock_overlaps_region(&memblock.reserved, base, size) >= 0;
+	return memblock_overlaps_region(&memblock.reserved, base, size);
 }
 
 void __init_memblock memblock_trim_memory(phys_addr_t align)

From 95cf82ecc1fcb44df1768162343cc8eb88083b86 Mon Sep 17 00:00:00 2001
From: Tang Chen <tangchen@cn.fujitsu.com>
Date: Tue, 8 Sep 2015 15:02:03 -0700
Subject: [PATCH 670/734] mem-hotplug: handle node hole when initializing
 numa_meminfo.

When parsing SRAT, all memory ranges are added into numa_meminfo.  In
numa_init(), before entering numa_cleanup_meminfo(), all possible memory
ranges are in numa_meminfo.  And numa_cleanup_meminfo() removes all
ranges over max_pfn or empty.

But, this only works if the nodes are continuous.  Let's have a look at
the following example:

We have an SRAT like this:
SRAT: Node 0 PXM 0 [mem 0x00000000-0x5fffffff]
SRAT: Node 0 PXM 0 [mem 0x100000000-0x1ffffffffff]
SRAT: Node 1 PXM 1 [mem 0x20000000000-0x3ffffffffff]
SRAT: Node 4 PXM 2 [mem 0x40000000000-0x5ffffffffff] hotplug
SRAT: Node 5 PXM 3 [mem 0x60000000000-0x7ffffffffff] hotplug
SRAT: Node 2 PXM 4 [mem 0x80000000000-0x9ffffffffff] hotplug
SRAT: Node 3 PXM 5 [mem 0xa0000000000-0xbffffffffff] hotplug
SRAT: Node 6 PXM 6 [mem 0xc0000000000-0xdffffffffff] hotplug
SRAT: Node 7 PXM 7 [mem 0xe0000000000-0xfffffffffff] hotplug

On boot, only node 0,1,2,3 exist.

And the numa_meminfo will look like this:
numa_meminfo.nr_blks = 9
1. on node 0: [0, 60000000]
2. on node 0: [100000000, 20000000000]
3. on node 1: [20000000000, 40000000000]
4. on node 4: [40000000000, 60000000000]
5. on node 5: [60000000000, 80000000000]
6. on node 2: [80000000000, a0000000000]
7. on node 3: [a0000000000, a0800000000]
8. on node 6: [c0000000000, a0800000000]
9. on node 7: [e0000000000, a0800000000]

And numa_cleanup_meminfo() will merge 1 and 2, and remove 8,9 because the
end address is over max_pfn, which is a0800000000.  But 4 and 5 are not
removed because their end addresses are less then max_pfn.  But in fact,
node 4 and 5 don't exist.

In a word, numa_cleanup_meminfo() is not able to handle holes between nodes.

Since memory ranges in node 4 and 5 are in numa_meminfo, in
numa_register_memblks(), node 4 and 5 will be mistakenly set to online.

If you run lscpu, it will show:
NUMA node0 CPU(s):     0-14,128-142
NUMA node1 CPU(s):     15-29,143-157
NUMA node2 CPU(s):
NUMA node3 CPU(s):
NUMA node4 CPU(s):     62-76,190-204
NUMA node5 CPU(s):     78-92,206-220

In this patch, we use memblock_overlaps_region() to check if ranges in
numa_meminfo overlap with ranges in memory_block.  Since memory_block
contains all available memory at boot time, if they overlap, it means the
ranges exist.  If not, then remove them from numa_meminfo.

After this patch, lscpu will show:
NUMA node0 CPU(s):     0-14,128-142
NUMA node1 CPU(s):     15-29,143-157
NUMA node4 CPU(s):     62-76,190-204
NUMA node5 CPU(s):     78-92,206-220

Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Reviewed-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tejun Heo <tj@kernel.org>
Cc: Luiz Capitulino <lcapitulino@redhat.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Vladimir Murzin <vladimir.murzin@arm.com>
Cc: Fabian Frederick <fabf@skynet.be>
Cc: Alexander Kuleshov <kuleshovmail@gmail.com>
Cc: Baoquan He <bhe@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/numa.c       | 6 ++++--
 include/linux/memblock.h | 2 ++
 mm/memblock.c            | 2 +-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 4053bb58bf92e6..c3b3f653ed0c6c 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -246,8 +246,10 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
 		bi->start = max(bi->start, low);
 		bi->end = min(bi->end, high);
 
-		/* and there's no empty block */
-		if (bi->start >= bi->end)
+		/* and there's no empty or non-exist block */
+		if (bi->start >= bi->end ||
+		    !memblock_overlaps_region(&memblock.memory,
+			bi->start, bi->end - bi->start))
 			numa_remove_memblk_from(i--, mi);
 	}
 
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index d312ae3b51fc6b..c518eb5892603f 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -77,6 +77,8 @@ int memblock_remove(phys_addr_t base, phys_addr_t size);
 int memblock_free(phys_addr_t base, phys_addr_t size);
 int memblock_reserve(phys_addr_t base, phys_addr_t size);
 void memblock_trim_memory(phys_addr_t align);
+bool memblock_overlaps_region(struct memblock_type *type,
+			      phys_addr_t base, phys_addr_t size);
 int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size);
 int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size);
 int memblock_mark_mirror(phys_addr_t base, phys_addr_t size);
diff --git a/mm/memblock.c b/mm/memblock.c
index 08a5126338dbfd..509255223688eb 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -91,7 +91,7 @@ static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, p
 	return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
 }
 
-static bool __init_memblock memblock_overlaps_region(struct memblock_type *type,
+bool __init_memblock memblock_overlaps_region(struct memblock_type *type,
 					phys_addr_t base, phys_addr_t size)
 {
 	unsigned long i;

From acda0c3340282bc7c36f4e9a5e2ccb7bb7e64676 Mon Sep 17 00:00:00 2001
From: Aristeu Rozanski <aris@redhat.com>
Date: Tue, 8 Sep 2015 15:02:06 -0700
Subject: [PATCH 671/734] mm/mempolicy.c: get rid of duplicated check for
 vma(VM_PFNMAP) in queue_pages_range()

This check was introduced as part of
   6f4576e3687 ("mempolicy: apply page table walker on queue_pages_range()")

which got duplicated by
   48684a65b4e ("mm: pagewalk: fix misbehavior of walk_page_range for vma(VM_PFNMAP)")

by reintroducing it earlier on queue_page_test_walk()

Signed-off-by: Aristeu Rozanski <aris@redhat.com>
Acked-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/mempolicy.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index a7f1e0d1d6b8fe..d6f2caee28c045 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -608,9 +608,6 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
 
 	qp->prev = vma;
 
-	if (vma->vm_flags & VM_PFNMAP)
-		return 1;
-
 	if (flags & MPOL_MF_LAZY) {
 		/* Similar to task_numa_work, skip inaccessible VMAs */
 		if (vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))

From c5b4e1b02f2a0c2309ecd58a235a2f5ee4eb0074 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Tue, 8 Sep 2015 15:02:09 -0700
Subject: [PATCH 672/734] mm, page_isolation: make
 set/unset_migratetype_isolate() file-local

Nowaday, set/unset_migratetype_isolate() is defined and used only in
mm/page_isolation, so let's limit the scope within the file.

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-isolation.h | 5 -----
 mm/page_isolation.c            | 5 +++--
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index 2dc1e1697b451c..047d64706f2a29 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -65,11 +65,6 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
 int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
 			bool skip_hwpoisoned_pages);
 
-/*
- * Internal functions. Changes pageblock's migrate type.
- */
-int set_migratetype_isolate(struct page *page, bool skip_hwpoisoned_pages);
-void unset_migratetype_isolate(struct page *page, unsigned migratetype);
 struct page *alloc_migrate_target(struct page *page, unsigned long private,
 				int **resultp);
 
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 32fdc1df05e5f0..4568fd58f70a02 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -9,7 +9,8 @@
 #include <linux/hugetlb.h>
 #include "internal.h"
 
-int set_migratetype_isolate(struct page *page, bool skip_hwpoisoned_pages)
+static int set_migratetype_isolate(struct page *page,
+				bool skip_hwpoisoned_pages)
 {
 	struct zone *zone;
 	unsigned long flags, pfn;
@@ -72,7 +73,7 @@ int set_migratetype_isolate(struct page *page, bool skip_hwpoisoned_pages)
 	return ret;
 }
 
-void unset_migratetype_isolate(struct page *page, unsigned migratetype)
+static void unset_migratetype_isolate(struct page *page, unsigned migratetype)
 {
 	struct zone *zone;
 	unsigned long flags, nr_pages;

From 1b4ace4141db1ddc46f6c9915086dd5e18d7154d Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@ezchip.com>
Date: Tue, 8 Sep 2015 15:02:12 -0700
Subject: [PATCH 673/734] bootmem: avoid freeing to bootmem after bootmem is
 done

Bootmem isn't popular any more, but some architectures still use it, and
freeing to bootmem after calling free_all_bootmem_core() can end up
scribbling over random memory.  Instead, make sure the kernel generates
a warning in this case by ensuring the node_bootmem_map field is
non-NULL when are freeing or marking bootmem.

An instance of this bug was just fixed in the tile architecture ("tile:
use free_bootmem_late() for initrd") and catching this case more widely
seems like a good thing.

Signed-off-by: Chris Metcalf <cmetcalf@ezchip.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Paul McQuade <paulmcquad@gmail.com>
Cc: Tang Chen <tangchen@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/bootmem.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/mm/bootmem.c b/mm/bootmem.c
index a23dd193465482..3b6380784c2859 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -236,6 +236,7 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
 	count += pages;
 	while (pages--)
 		__free_pages_bootmem(page++, cur++, 0);
+	bdata->node_bootmem_map = NULL;
 
 	bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count);
 
@@ -294,6 +295,9 @@ static void __init __free(bootmem_data_t *bdata,
 		sidx + bdata->node_min_pfn,
 		eidx + bdata->node_min_pfn);
 
+	if (WARN_ON(bdata->node_bootmem_map == NULL))
+		return;
+
 	if (bdata->hint_idx > sidx)
 		bdata->hint_idx = sidx;
 
@@ -314,6 +318,9 @@ static int __init __reserve(bootmem_data_t *bdata, unsigned long sidx,
 		eidx + bdata->node_min_pfn,
 		flags);
 
+	if (WARN_ON(bdata->node_bootmem_map == NULL))
+		return 0;
+
 	for (idx = sidx; idx < eidx; idx++)
 		if (test_and_set_bit(idx, bdata->node_bootmem_map)) {
 			if (exclusive) {

From 64b990d2957cb535fe1c17b9694d5d4f7de69962 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Tue, 8 Sep 2015 15:02:15 -0700
Subject: [PATCH 674/734] mm: drop __nocast from vm_flags_t definition

__nocast does no good for vm_flags_t. It only produces useless sparse
warnings.

Let's drop it.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index c8d0a73d64c455..3d6baa7d4534c6 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -235,7 +235,7 @@ struct page_frag_cache {
 	bool pfmemalloc;
 };
 
-typedef unsigned long __nocast vm_flags_t;
+typedef unsigned long vm_flags_t;
 
 /*
  * A region containing a mapping of a non-memory backed file under NOMMU

From 0b802f101d0c6caeeee89066dc2c8665082a83df Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@parallels.com>
Date: Tue, 8 Sep 2015 15:02:18 -0700
Subject: [PATCH 675/734] mm: vmscan: never isolate more pages than necessary

If transparent huge pages are enabled, we can isolate many more pages
than we actually need to scan, because we count both single and huge
pages equally in isolate_lru_pages().

Since commit 5bc7b8aca942d ("mm: thp: add split tail pages to shrink
page list in page reclaim"), we scan all the tail pages immediately
after a huge page split (see shrink_page_list()).  As a result, we can
reclaim up to SWAP_CLUSTER_MAX * HPAGE_PMD_NR (512 MB) in one run!

This is easy to catch on memcg reclaim with zswap enabled.  The latter
makes swapout instant so that if we happen to scan an unreferenced huge
page we will evict both its head and tail pages immediately, which is
likely to result in excessive reclaim.

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/vmscan.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index bf23c88621ce66..0b1aab411cb083 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1359,7 +1359,8 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 	unsigned long nr_taken = 0;
 	unsigned long scan;
 
-	for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {
+	for (scan = 0; scan < nr_to_scan && nr_taken < nr_to_scan &&
+					!list_empty(src); scan++) {
 		struct page *page;
 		int nr_pages;
 

From c54839a722a02818677bcabe57e957f0ce4f841d Mon Sep 17 00:00:00 2001
From: Jaewon Kim <jaewon31.kim@samsung.com>
Date: Tue, 8 Sep 2015 15:02:21 -0700
Subject: [PATCH 676/734] vmscan: fix increasing nr_isolated incurred by
 putback unevictable pages

reclaim_clean_pages_from_list() assumes that shrink_page_list() returns
number of pages removed from the candidate list.  But shrink_page_list()
puts back mlocked pages without passing it to caller and without
counting as nr_reclaimed.  This increases nr_isolated.

To fix this, this patch changes shrink_page_list() to pass unevictable
pages back to caller.  Caller will take care those pages.

Minchan said:

It fixes two issues.

1. With unevictable page, cma_alloc will be successful.

Exactly speaking, cma_alloc of current kernel will fail due to
unevictable pages.

2. fix leaking of NR_ISOLATED counter of vmstat

With it, too_many_isolated works.  Otherwise, it could make hang until
the process get SIGKILL.

Signed-off-by: Jaewon Kim <jaewon31.kim@samsung.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/vmscan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0b1aab411cb083..8276a3a615ca00 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1196,7 +1196,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		if (PageSwapCache(page))
 			try_to_free_swap(page);
 		unlock_page(page);
-		putback_lru_page(page);
+		list_add(&page->lru, &ret_pages);
 		continue;
 
 activate_locked:

From fa23f56d90ed7bd760ae2aea6dfb2f501a099e90 Mon Sep 17 00:00:00 2001
From: "Sean O. Stalley" <sean.stalley@intel.com>
Date: Tue, 8 Sep 2015 15:02:24 -0700
Subject: [PATCH 677/734] mm: add support for __GFP_ZERO flag to
 dma_pool_alloc()

Currently a call to dma_pool_alloc() with a ___GFP_ZERO flag returns a
non-zeroed memory region.

This patchset adds support for the __GFP_ZERO flag to dma_pool_alloc(),
adds 2 wrapper functions for allocing zeroed memory from a pool, and
provides a coccinelle script for finding & replacing instances of
dma_pool_alloc() followed by memset(0) with a single dma_pool_zalloc()
call.

There was some concern that this always calls memset() to zero, instead
of passing __GFP_ZERO into the page allocator.
[https://lkml.org/lkml/2015/7/15/881]

I ran a test on my system to get an idea of how often dma_pool_alloc()
calls into pool_alloc_page().

After Boot:	[   30.119863] alloc_calls:541, page_allocs:7
After an hour:	[ 3600.951031] alloc_calls:9566, page_allocs:12
After copying 1GB file onto a USB drive:
		[ 4260.657148] alloc_calls:17225, page_allocs:12

It doesn't look like dma_pool_alloc() calls down to the page allocator
very often (at least on my system).

This patch (of 4):

Currently the __GFP_ZERO flag is ignored by dma_pool_alloc().
Make dma_pool_alloc() zero the memory if this flag is set.

Signed-off-by: Sean O. Stalley <sean.stalley@intel.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Vinod Koul <vinod.koul@intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Gilles Muller <Gilles.Muller@lip6.fr>
Cc: Nicolas Palix <nicolas.palix@imag.fr>
Cc: Michal Marek <mmarek@suse.cz>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/dmapool.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/mm/dmapool.c b/mm/dmapool.c
index 4b657099111f8a..71a8998cd03a6b 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -337,7 +337,7 @@ void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
 	/* pool_alloc_page() might sleep, so temporarily drop &pool->lock */
 	spin_unlock_irqrestore(&pool->lock, flags);
 
-	page = pool_alloc_page(pool, mem_flags);
+	page = pool_alloc_page(pool, mem_flags & (~__GFP_ZERO));
 	if (!page)
 		return NULL;
 
@@ -375,9 +375,14 @@ void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
 			break;
 		}
 	}
-	memset(retval, POOL_POISON_ALLOCATED, pool->size);
+	if (!(mem_flags & __GFP_ZERO))
+		memset(retval, POOL_POISON_ALLOCATED, pool->size);
 #endif
 	spin_unlock_irqrestore(&pool->lock, flags);
+
+	if (mem_flags & __GFP_ZERO)
+		memset(retval, 0, pool->size);
+
 	return retval;
 }
 EXPORT_SYMBOL(dma_pool_alloc);

From ad82362b2defd4adad87d8538617b2f51a4bf9c3 Mon Sep 17 00:00:00 2001
From: "Sean O. Stalley" <sean.stalley@intel.com>
Date: Tue, 8 Sep 2015 15:02:27 -0700
Subject: [PATCH 678/734] mm: add dma_pool_zalloc() call to DMA API

Add a wrapper function for dma_pool_alloc() to get zeroed memory.

Signed-off-by: Sean O. Stalley <sean.stalley@intel.com>
Cc: Vinod Koul <vinod.koul@intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Gilles Muller <Gilles.Muller@lip6.fr>
Cc: Nicolas Palix <nicolas.palix@imag.fr>
Cc: Michal Marek <mmarek@suse.cz>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/DMA-API.txt | 7 +++++++
 include/linux/dmapool.h   | 6 ++++++
 2 files changed, 13 insertions(+)

diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index 7eba542eff7c83..edccacd4f048a1 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -104,6 +104,13 @@ crossing restrictions, pass 0 for alloc; passing 4096 says memory allocated
 from this pool must not cross 4KByte boundaries.
 
 
+	void *dma_pool_zalloc(struct dma_pool *pool, gfp_t mem_flags,
+			      dma_addr_t *handle)
+
+Wraps dma_pool_alloc() and also zeroes the returned memory if the
+allocation attempt succeeded.
+
+
 	void *dma_pool_alloc(struct dma_pool *pool, gfp_t gfp_flags,
 			dma_addr_t *dma_handle);
 
diff --git a/include/linux/dmapool.h b/include/linux/dmapool.h
index e1043f79122f82..53ba737505df31 100644
--- a/include/linux/dmapool.h
+++ b/include/linux/dmapool.h
@@ -24,6 +24,12 @@ void dma_pool_destroy(struct dma_pool *pool);
 void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
 		     dma_addr_t *handle);
 
+static inline void *dma_pool_zalloc(struct dma_pool *pool, gfp_t mem_flags,
+				    dma_addr_t *handle)
+{
+	return dma_pool_alloc(pool, mem_flags | __GFP_ZERO, handle);
+}
+
 void dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t addr);
 
 /*

From 01a7fd337b2c2af97e9c55bb9406a222a2e209d3 Mon Sep 17 00:00:00 2001
From: "Sean O. Stalley" <sean.stalley@intel.com>
Date: Tue, 8 Sep 2015 15:02:30 -0700
Subject: [PATCH 679/734] pci: mm: add pci_pool_zalloc() call

Add a wrapper function for pci_pool_alloc() to get zeroed memory.

Signed-off-by: Sean O. Stalley <sean.stalley@intel.com>
Cc: Vinod Koul <vinod.koul@intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Gilles Muller <Gilles.Muller@lip6.fr>
Cc: Nicolas Palix <nicolas.palix@imag.fr>
Cc: Michal Marek <mmarek@suse.cz>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pci.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 1a64733c48c741..e90eb22de6286d 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1227,6 +1227,8 @@ int pci_set_vga_state(struct pci_dev *pdev, bool decode,
 		dma_pool_create(name, &pdev->dev, size, align, allocation)
 #define	pci_pool_destroy(pool) dma_pool_destroy(pool)
 #define	pci_pool_alloc(pool, flags, handle) dma_pool_alloc(pool, flags, handle)
+#define	pci_pool_zalloc(pool, flags, handle) \
+		dma_pool_zalloc(pool, flags, handle)
 #define	pci_pool_free(pool, vaddr, addr) dma_pool_free(pool, vaddr, addr)
 
 struct msix_entry {

From 1fc524d74cf40072a2de3f74a920818398dbff30 Mon Sep 17 00:00:00 2001
From: "Sean O. Stalley" <sean.stalley@intel.com>
Date: Tue, 8 Sep 2015 15:02:33 -0700
Subject: [PATCH 680/734] coccinelle: mm:
 scripts/coccinelle/api/alloc/pool_zalloc-simple.cocci

add [pci|dma]_pool_zalloc coccinelle check.
replaces instances of [pci|dma]_pool_alloc() followed by memset(0)
with [pci|dma]_pool_zalloc().

Signed-off-by: Sean O. Stalley <sean.stalley@intel.com>
Acked-by: Julia Lawall <julia.lawall@lip6.fr>
Cc: Vinod Koul <vinod.koul@intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Gilles Muller <Gilles.Muller@lip6.fr>
Cc: Nicolas Palix <nicolas.palix@imag.fr>
Cc: Michal Marek <mmarek@suse.cz>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 .../api/alloc/pool_zalloc-simple.cocci        | 84 +++++++++++++++++++
 1 file changed, 84 insertions(+)
 create mode 100644 scripts/coccinelle/api/alloc/pool_zalloc-simple.cocci

diff --git a/scripts/coccinelle/api/alloc/pool_zalloc-simple.cocci b/scripts/coccinelle/api/alloc/pool_zalloc-simple.cocci
new file mode 100644
index 00000000000000..9b7eb321a02563
--- /dev/null
+++ b/scripts/coccinelle/api/alloc/pool_zalloc-simple.cocci
@@ -0,0 +1,84 @@
+///
+/// Use *_pool_zalloc rather than *_pool_alloc followed by memset with 0
+///
+// Copyright: (C) 2015 Intel Corp.  GPLv2.
+// Options: --no-includes --include-headers
+//
+// Keywords: dma_pool_zalloc, pci_pool_zalloc
+//
+
+virtual context
+virtual patch
+virtual org
+virtual report
+
+//----------------------------------------------------------
+//  For context mode
+//----------------------------------------------------------
+
+@depends on context@
+expression x;
+statement S;
+@@
+
+* x = \(dma_pool_alloc\|pci_pool_alloc\)(...);
+  if ((x==NULL) || ...) S
+* memset(x,0, ...);
+
+//----------------------------------------------------------
+//  For patch mode
+//----------------------------------------------------------
+
+@depends on patch@
+expression x;
+expression a,b,c;
+statement S;
+@@
+
+- x = dma_pool_alloc(a,b,c);
++ x = dma_pool_zalloc(a,b,c);
+  if ((x==NULL) || ...) S
+- memset(x,0,...);
+
+@depends on patch@
+expression x;
+expression a,b,c;
+statement S;
+@@
+
+- x = pci_pool_alloc(a,b,c);
++ x = pci_pool_zalloc(a,b,c);
+  if ((x==NULL) || ...) S
+- memset(x,0,...);
+
+//----------------------------------------------------------
+//  For org and report mode
+//----------------------------------------------------------
+
+@r depends on org || report@
+expression x;
+expression a,b,c;
+statement S;
+position p;
+@@
+
+ x = @p\(dma_pool_alloc\|pci_pool_alloc\)(a,b,c);
+ if ((x==NULL) || ...) S
+ memset(x,0, ...);
+
+@script:python depends on org@
+p << r.p;
+x << r.x;
+@@
+
+msg="%s" % (x)
+msg_safe=msg.replace("[","@(").replace("]",")")
+coccilib.org.print_todo(p[0], msg_safe)
+
+@script:python depends on report@
+p << r.p;
+x << r.x;
+@@
+
+msg="WARNING: *_pool_zalloc should be used for %s, instead of *_pool_alloc/memset" % (x)
+coccilib.report.print_report(p[0], msg)

From f2849aa09d4fbc4145ebb5dc96187c9ab967f5cf Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 8 Sep 2015 15:02:36 -0700
Subject: [PATCH 681/734] mm, compaction: more robust check for scanners
 meeting

Assorted compaction cleanups and optimizations.  The interesting patches
are 4 and 5.  In 4, skipping of compound pages in single iteration is
improved for migration scanner, so it works also for !PageLRU compound
pages such as hugetlbfs, slab etc.  Patch 5 introduces this kind of
skipping in the free scanner.  The trick is that we can read
compound_order() without any protection, if we are careful to filter out
values larger than MAX_ORDER.  The only danger is that we skip too much.
The same trick was already used for reading the freepage order in the
migrate scanner.

To demonstrate improvements of Patches 4 and 5 I've run stress-highalloc
from mmtests, set to simulate THP allocations (including __GFP_COMP) on
a 4GB system where 1GB was occupied by hugetlbfs pages.  I'll include
just the relevant stats:

                               Patch 3     Patch 4     Patch 5

Compaction stalls                 7523        7529        7515
Compaction success                 323         304         322
Compaction failures               7200        7224        7192
Page migrate success            247778      264395      240737
Page migrate failure             15358       33184       21621
Compaction pages isolated       906928      980192      909983
Compaction migrate scanned     2005277     1692805     1498800
Compaction free scanned       13255284    11539986     9011276
Compaction cost                    288         305         277

With 5 iterations per patch, the results are still noisy, but we can see
that Patch 4 does reduce migrate_scanned by 15% thanks to skipping the
hugetlbfs pages at once.  Interestingly, free_scanned is also reduced
and I have no idea why.  Patch 5 further reduces free_scanned as
expected, by 15%.  Other stats are unaffected modulo noise.

[1] https://lkml.org/lkml/2015/1/19/158

This patch (of 5):

Compaction should finish when the migration and free scanner meet, i.e.
they reach the same pageblock.  Currently however, the test in
compact_finished() simply just compares the exact pfns, which may yield
a false negative when the free scanner position is in the middle of a
pageblock and the migration scanner reaches the begining of the same
pageblock.

This hasn't been a problem until commit e14c720efdd7 ("mm, compaction:
remember position within pageblock in free pages scanner") allowed the
free scanner position to be in the middle of a pageblock between
invocations.  The hot-fix 1d5bfe1ffb5b ("mm, compaction: prevent
infinite loop in compact_zone") prevented the issue by adding a special
check in the migration scanner to satisfy the current detection of
scanners meeting.

However, the proper fix is to make the detection more robust.  This
patch introduces the compact_scanners_met() function that returns true
when the free scanner position is in the same or lower pageblock than
the migration scanner.  The special case in isolate_migratepages()
introduced by 1d5bfe1ffb5b is removed.

Suggested-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Minchan Kim <minchan@kernel.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Christoph Lameter <cl@linux.com>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/compaction.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/mm/compaction.c b/mm/compaction.c
index 018f08da99a2ec..7077b81a489371 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -897,6 +897,16 @@ static bool suitable_migration_target(struct page *page)
 	return false;
 }
 
+/*
+ * Test whether the free scanner has reached the same or lower pageblock than
+ * the migration scanner, and compaction should thus terminate.
+ */
+static inline bool compact_scanners_met(struct compact_control *cc)
+{
+	return (cc->free_pfn >> pageblock_order)
+		<= (cc->migrate_pfn >> pageblock_order);
+}
+
 /*
  * Based on information in the current compact_control, find blocks
  * suitable for isolating free pages from and then isolate them.
@@ -1127,12 +1137,8 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
 	}
 
 	acct_isolated(zone, cc);
-	/*
-	 * Record where migration scanner will be restarted. If we end up in
-	 * the same pageblock as the free scanner, make the scanners fully
-	 * meet so that compact_finished() terminates compaction.
-	 */
-	cc->migrate_pfn = (end_pfn <= cc->free_pfn) ? low_pfn : cc->free_pfn;
+	/* Record where migration scanner will be restarted. */
+	cc->migrate_pfn = low_pfn;
 
 	return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE;
 }
@@ -1147,7 +1153,7 @@ static int __compact_finished(struct zone *zone, struct compact_control *cc,
 		return COMPACT_PARTIAL;
 
 	/* Compaction run completes if the migrate and free scanner meet */
-	if (cc->free_pfn <= cc->migrate_pfn) {
+	if (compact_scanners_met(cc)) {
 		/* Let the next compaction start anew. */
 		zone->compact_cached_migrate_pfn[0] = zone->zone_start_pfn;
 		zone->compact_cached_migrate_pfn[1] = zone->zone_start_pfn;
@@ -1376,7 +1382,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 			 * migrate_pages() may return -ENOMEM when scanners meet
 			 * and we want compact_finished() to detect it
 			 */
-			if (err == -ENOMEM && cc->free_pfn > cc->migrate_pfn) {
+			if (err == -ENOMEM && !compact_scanners_met(cc)) {
 				ret = COMPACT_PARTIAL;
 				goto out;
 			}

From f5f61a320bf6275f37fcabf6645b4ac8e683c007 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 8 Sep 2015 15:02:39 -0700
Subject: [PATCH 682/734] mm, compaction: simplify handling restart position in
 free pages scanner

Handling the position where compaction free scanner should restart
(stored in cc->free_pfn) got more complex with commit e14c720efdd7 ("mm,
compaction: remember position within pageblock in free pages scanner").
Currently the position is updated in each loop iteration of
isolate_freepages(), although it should be enough to update it only when
breaking from the loop.  There's also an extra check outside the loop
updates the position in case we have met the migration scanner.

This can be simplified if we move the test for having isolated enough
from the for-loop header next to the test for contention, and
determining the restart position only in these cases.  We can reuse the
isolate_start_pfn variable for this instead of setting cc->free_pfn
directly.  Outside the loop, we can simply set cc->free_pfn to current
value of isolate_start_pfn without any extra check.

Also add a VM_BUG_ON to catch possible mistake in the future, in case we
later add a new condition that terminates isolate_freepages_block()
prematurely without also considering the condition in
isolate_freepages().

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Minchan Kim <minchan@kernel.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/compaction.c | 35 ++++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/mm/compaction.c b/mm/compaction.c
index 7077b81a489371..2c1e1ff321bff5 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -943,8 +943,7 @@ static void isolate_freepages(struct compact_control *cc)
 	 * pages on cc->migratepages. We stop searching if the migrate
 	 * and free page scanners meet or enough free pages are isolated.
 	 */
-	for (; block_start_pfn >= low_pfn &&
-			cc->nr_migratepages > cc->nr_freepages;
+	for (; block_start_pfn >= low_pfn;
 				block_end_pfn = block_start_pfn,
 				block_start_pfn -= pageblock_nr_pages,
 				isolate_start_pfn = block_start_pfn) {
@@ -976,6 +975,8 @@ static void isolate_freepages(struct compact_control *cc)
 					block_end_pfn, freelist, false);
 
 		/*
+		 * If we isolated enough freepages, or aborted due to async
+		 * compaction being contended, terminate the loop.
 		 * Remember where the free scanner should restart next time,
 		 * which is where isolate_freepages_block() left off.
 		 * But if it scanned the whole pageblock, isolate_start_pfn
@@ -984,27 +985,31 @@ static void isolate_freepages(struct compact_control *cc)
 		 * In that case we will however want to restart at the start
 		 * of the previous pageblock.
 		 */
-		cc->free_pfn = (isolate_start_pfn < block_end_pfn) ?
-				isolate_start_pfn :
-				block_start_pfn - pageblock_nr_pages;
-
-		/*
-		 * isolate_freepages_block() might have aborted due to async
-		 * compaction being contended
-		 */
-		if (cc->contended)
+		if ((cc->nr_freepages >= cc->nr_migratepages)
+							|| cc->contended) {
+			if (isolate_start_pfn >= block_end_pfn)
+				isolate_start_pfn =
+					block_start_pfn - pageblock_nr_pages;
 			break;
+		} else {
+			/*
+			 * isolate_freepages_block() should not terminate
+			 * prematurely unless contended, or isolated enough
+			 */
+			VM_BUG_ON(isolate_start_pfn < block_end_pfn);
+		}
 	}
 
 	/* split_free_page does not map the pages */
 	map_pages(freelist);
 
 	/*
-	 * If we crossed the migrate scanner, we want to keep it that way
-	 * so that compact_finished() may detect this
+	 * Record where the free scanner will restart next time. Either we
+	 * broke from the loop and set isolate_start_pfn based on the last
+	 * call to isolate_freepages_block(), or we met the migration scanner
+	 * and the loop terminated due to isolate_start_pfn < low_pfn
 	 */
-	if (block_start_pfn < low_pfn)
-		cc->free_pfn = cc->migrate_pfn;
+	cc->free_pfn = isolate_start_pfn;
 }
 
 /*

From 02333641e2cf4ac9f23eeeb01183ed8318d346ca Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 8 Sep 2015 15:02:42 -0700
Subject: [PATCH 683/734] mm, compaction: encapsulate resetting cached scanner
 positions

Reseting the cached compaction scanner positions is now open-coded in
__reset_isolation_suitable() and compact_finished().  Encapsulate the
functionality in a new function reset_cached_positions().

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/compaction.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/mm/compaction.c b/mm/compaction.c
index 2c1e1ff321bff5..0dce7e87d771b3 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -207,6 +207,13 @@ static inline bool isolation_suitable(struct compact_control *cc,
 	return !get_pageblock_skip(page);
 }
 
+static void reset_cached_positions(struct zone *zone)
+{
+	zone->compact_cached_migrate_pfn[0] = zone->zone_start_pfn;
+	zone->compact_cached_migrate_pfn[1] = zone->zone_start_pfn;
+	zone->compact_cached_free_pfn = zone_end_pfn(zone);
+}
+
 /*
  * This function is called to clear all cached information on pageblocks that
  * should be skipped for page isolation when the migrate and free page scanner
@@ -218,9 +225,6 @@ static void __reset_isolation_suitable(struct zone *zone)
 	unsigned long end_pfn = zone_end_pfn(zone);
 	unsigned long pfn;
 
-	zone->compact_cached_migrate_pfn[0] = start_pfn;
-	zone->compact_cached_migrate_pfn[1] = start_pfn;
-	zone->compact_cached_free_pfn = end_pfn;
 	zone->compact_blockskip_flush = false;
 
 	/* Walk the zone and mark every pageblock as suitable for isolation */
@@ -238,6 +242,8 @@ static void __reset_isolation_suitable(struct zone *zone)
 
 		clear_pageblock_skip(page);
 	}
+
+	reset_cached_positions(zone);
 }
 
 void reset_isolation_suitable(pg_data_t *pgdat)
@@ -1160,9 +1166,7 @@ static int __compact_finished(struct zone *zone, struct compact_control *cc,
 	/* Compaction run completes if the migrate and free scanner meet */
 	if (compact_scanners_met(cc)) {
 		/* Let the next compaction start anew. */
-		zone->compact_cached_migrate_pfn[0] = zone->zone_start_pfn;
-		zone->compact_cached_migrate_pfn[1] = zone->zone_start_pfn;
-		zone->compact_cached_free_pfn = zone_end_pfn(zone);
+		reset_cached_positions(zone);
 
 		/*
 		 * Mark that the PG_migrate_skip information should be cleared

From 29c0dde830f8c08ceacf2d3edf6dc8ddd9a9c3c4 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 8 Sep 2015 15:02:46 -0700
Subject: [PATCH 684/734] mm, compaction: always skip all compound pages by
 order in migrate scanner

The compaction migrate scanner tries to skip THP pages by their order,
to reduce number of iterations for pages it cannot isolate.  The check
is only done if PageLRU() is true, which means it applies to THP pages,
but not e.g.  hugetlbfs pages or any other non-LRU compound pages, which
we have to iterate by base pages.

This limitation comes from the assumption that it's only safe to read
compound_order() when we have the zone's lru_lock and THP cannot be
split under us.  But the only danger (after filtering out order values
that are not below MAX_ORDER, to prevent overflows) is that we skip too
much or too little after reading a bogus compound_order() due to a rare
race.  This is the same reasoning as patch 99c0fd5e51c4 ("mm,
compaction: skip buddy pages by their order in the migrate scanner")
introduced for unsafely reading PageBuddy() order.

After this patch, all pages are tested for PageCompound() and we skip
them by compound_order().  The test is done after the test for
balloon_page_movable() as we don't want to assume if balloon pages (or
other pages with own isolation and migration implementation if a generic
API gets implemented) are compound or not.

When tested with stress-highalloc from mmtests on 4GB system with 1GB
hugetlbfs pages, the vmstat compact_migrate_scanned count decreased by
15%.

[kirill.shutemov@linux.intel.com: change PageTransHuge checks to PageCompound for different series was squashed here]
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Minchan Kim <minchan@kernel.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/compaction.c | 47 +++++++++++++++++++++++++++--------------------
 1 file changed, 27 insertions(+), 20 deletions(-)

diff --git a/mm/compaction.c b/mm/compaction.c
index 0dce7e87d771b3..1ccb015ab1eba4 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -680,6 +680,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 
 	/* Time to isolate some pages for migration */
 	for (; low_pfn < end_pfn; low_pfn++) {
+		bool is_lru;
+
 		/*
 		 * Periodically drop the lock (if held) regardless of its
 		 * contention, to give chance to IRQs. Abort async compaction
@@ -723,36 +725,35 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 		 * It's possible to migrate LRU pages and balloon pages
 		 * Skip any other type of page
 		 */
-		if (!PageLRU(page)) {
+		is_lru = PageLRU(page);
+		if (!is_lru) {
 			if (unlikely(balloon_page_movable(page))) {
 				if (balloon_page_isolate(page)) {
 					/* Successfully isolated */
 					goto isolate_success;
 				}
 			}
-			continue;
 		}
 
 		/*
-		 * PageLRU is set. lru_lock normally excludes isolation
-		 * splitting and collapsing (collapsing has already happened
-		 * if PageLRU is set) but the lock is not necessarily taken
-		 * here and it is wasteful to take it just to check transhuge.
-		 * Check TransHuge without lock and skip the whole pageblock if
-		 * it's either a transhuge or hugetlbfs page, as calling
-		 * compound_order() without preventing THP from splitting the
-		 * page underneath us may return surprising results.
+		 * Regardless of being on LRU, compound pages such as THP and
+		 * hugetlbfs are not to be compacted. We can potentially save
+		 * a lot of iterations if we skip them at once. The check is
+		 * racy, but we can consider only valid values and the only
+		 * danger is skipping too much.
 		 */
-		if (PageTransHuge(page)) {
-			if (!locked)
-				low_pfn = ALIGN(low_pfn + 1,
-						pageblock_nr_pages) - 1;
-			else
-				low_pfn += (1 << compound_order(page)) - 1;
+		if (PageCompound(page)) {
+			unsigned int comp_order = compound_order(page);
+
+			if (likely(comp_order < MAX_ORDER))
+				low_pfn += (1UL << comp_order) - 1;
 
 			continue;
 		}
 
+		if (!is_lru)
+			continue;
+
 		/*
 		 * Migration will fail if an anonymous page is pinned in memory,
 		 * so avoid taking lru_lock and isolating it unnecessarily in an
@@ -769,11 +770,17 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 			if (!locked)
 				break;
 
-			/* Recheck PageLRU and PageTransHuge under lock */
+			/* Recheck PageLRU and PageCompound under lock */
 			if (!PageLRU(page))
 				continue;
-			if (PageTransHuge(page)) {
-				low_pfn += (1 << compound_order(page)) - 1;
+
+			/*
+			 * Page become compound since the non-locked check,
+			 * and it's on LRU. It can only be a THP so the order
+			 * is safe to read and it's 0 for tail pages.
+			 */
+			if (unlikely(PageCompound(page))) {
+				low_pfn += (1UL << compound_order(page)) - 1;
 				continue;
 			}
 		}
@@ -784,7 +791,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 		if (__isolate_lru_page(page, isolate_mode) != 0)
 			continue;
 
-		VM_BUG_ON_PAGE(PageTransCompound(page), page);
+		VM_BUG_ON_PAGE(PageCompound(page), page);
 
 		/* Successfully isolated */
 		del_page_from_lru_list(page, lruvec, page_lru(page));

From 9fcd6d2e052eef525e94a9ae58dbe7ed4df4f5a7 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 8 Sep 2015 15:02:49 -0700
Subject: [PATCH 685/734] mm, compaction: skip compound pages by order in free
 scanner

The compaction free scanner is looking for PageBuddy() pages and
skipping all others.  For large compound pages such as THP or hugetlbfs,
we can save a lot of iterations if we skip them at once using their
compound_order().  This is generally unsafe and we can read a bogus
value of order due to a race, but if we are careful, the only danger is
skipping too much.

When tested with stress-highalloc from mmtests on 4GB system with 1GB
hugetlbfs pages, the vmstat compact_free_scanned count decreased by at
least 15%.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Mel Gorman <mgorman@suse.de>
Acked-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/compaction.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/mm/compaction.c b/mm/compaction.c
index 1ccb015ab1eba4..8f64d353399007 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -437,6 +437,24 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
 
 		if (!valid_page)
 			valid_page = page;
+
+		/*
+		 * For compound pages such as THP and hugetlbfs, we can save
+		 * potentially a lot of iterations if we skip them at once.
+		 * The check is racy, but we can consider only valid values
+		 * and the only danger is skipping too much.
+		 */
+		if (PageCompound(page)) {
+			unsigned int comp_order = compound_order(page);
+
+			if (likely(comp_order < MAX_ORDER)) {
+				blockpfn += (1UL << comp_order) - 1;
+				cursor += (1UL << comp_order) - 1;
+			}
+
+			goto isolate_fail;
+		}
+
 		if (!PageBuddy(page))
 			goto isolate_fail;
 
@@ -496,6 +514,13 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
 
 	}
 
+	/*
+	 * There is a tiny chance that we have read bogus compound_order(),
+	 * so be careful to not go outside of the pageblock.
+	 */
+	if (unlikely(blockpfn > end_pfn))
+		blockpfn = end_pfn;
+
 	trace_mm_compaction_isolate_freepages(*start_pfn, blockpfn,
 					nr_scanned, total_isolated);
 

From 243db5351aae5e6756fb610d41431a30d44b56a6 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Tue, 8 Sep 2015 15:02:52 -0700
Subject: [PATCH 686/734] Revert "selftests: add hugetlbfstest"

This manually reverts 7e50533d4b842 ("selftests: add hugetlbfstest").

The hugetlbfstest test depends on hugetlb pages being counted in a
task's rss.  This functionality is not in the kernel, so the test will
always fail.  Remove test to avoid confusion.

Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Acked-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Joern Engel <joern@logfs.org>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: David Rientjes <rientjes@google.com>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/Makefile        |  1 -
 tools/testing/selftests/vm/hugetlbfstest.c | 86 ----------------------
 tools/testing/selftests/vm/run_vmtests     | 11 ---
 3 files changed, 98 deletions(-)
 delete mode 100644 tools/testing/selftests/vm/hugetlbfstest.c

diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 0d6854744b3730..d36fab7d8ebd90 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -4,7 +4,6 @@ CFLAGS = -Wall
 BINARIES = compaction_test
 BINARIES += hugepage-mmap
 BINARIES += hugepage-shm
-BINARIES += hugetlbfstest
 BINARIES += map_hugetlb
 BINARIES += thuge-gen
 BINARIES += transhuge-stress
diff --git a/tools/testing/selftests/vm/hugetlbfstest.c b/tools/testing/selftests/vm/hugetlbfstest.c
deleted file mode 100644
index 02e1072ec1874c..00000000000000
--- a/tools/testing/selftests/vm/hugetlbfstest.c
+++ /dev/null
@@ -1,86 +0,0 @@
-#define _GNU_SOURCE
-#include <assert.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-typedef unsigned long long u64;
-
-static size_t length = 1 << 24;
-
-static u64 read_rss(void)
-{
-	char buf[4096], *s = buf;
-	int i, fd;
-	u64 rss;
-
-	fd = open("/proc/self/statm", O_RDONLY);
-	assert(fd > 2);
-	memset(buf, 0, sizeof(buf));
-	read(fd, buf, sizeof(buf) - 1);
-	for (i = 0; i < 1; i++)
-		s = strchr(s, ' ') + 1;
-	rss = strtoull(s, NULL, 10);
-	return rss << 12; /* assumes 4k pagesize */
-}
-
-static void do_mmap(int fd, int extra_flags, int unmap)
-{
-	int *p;
-	int flags = MAP_PRIVATE | MAP_POPULATE | extra_flags;
-	u64 before, after;
-	int ret;
-
-	before = read_rss();
-	p = mmap(NULL, length, PROT_READ | PROT_WRITE, flags, fd, 0);
-	assert(p != MAP_FAILED ||
-			!"mmap returned an unexpected error");
-	after = read_rss();
-	assert(llabs(after - before - length) < 0x40000 ||
-			!"rss didn't grow as expected");
-	if (!unmap)
-		return;
-	ret = munmap(p, length);
-	assert(!ret || !"munmap returned an unexpected error");
-	after = read_rss();
-	assert(llabs(after - before) < 0x40000 ||
-			!"rss didn't shrink as expected");
-}
-
-static int open_file(const char *path)
-{
-	int fd, err;
-
-	unlink(path);
-	fd = open(path, O_CREAT | O_RDWR | O_TRUNC | O_EXCL
-			| O_LARGEFILE | O_CLOEXEC, 0600);
-	assert(fd > 2);
-	unlink(path);
-	err = ftruncate(fd, length);
-	assert(!err);
-	return fd;
-}
-
-int main(void)
-{
-	int hugefd, fd;
-
-	fd = open_file("/dev/shm/hugetlbhog");
-	hugefd = open_file("/hugepages/hugetlbhog");
-
-	system("echo 100 > /proc/sys/vm/nr_hugepages");
-	do_mmap(-1, MAP_ANONYMOUS, 1);
-	do_mmap(fd, 0, 1);
-	do_mmap(-1, MAP_ANONYMOUS | MAP_HUGETLB, 1);
-	do_mmap(hugefd, 0, 1);
-	do_mmap(hugefd, MAP_HUGETLB, 1);
-	/* Leak the last one to test do_exit() */
-	do_mmap(-1, MAP_ANONYMOUS | MAP_HUGETLB, 0);
-	printf("oll korrekt.\n");
-	return 0;
-}
diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
index 831adeb5fc552b..d891d6e326f491 100755
--- a/tools/testing/selftests/vm/run_vmtests
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -75,17 +75,6 @@ else
 	echo "[PASS]"
 fi
 
-echo "--------------------"
-echo "running hugetlbfstest"
-echo "--------------------"
-./hugetlbfstest
-if [ $? -ne 0 ]; then
-	echo "[FAIL]"
-	exitcode=1
-else
-	echo "[PASS]"
-fi
-
 echo "--------------------"
 echo "running userfaultfd"
 echo "--------------------"

From fd5a9ecd6880619bea74c6b12ec86819eacfb012 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Tue, 8 Sep 2015 15:02:55 -0700
Subject: [PATCH 687/734] selftests:vm: point to libhugetlbfs for regression
 testing

The hugetlb selftests provide minimal coverage.  Have run script point
people at libhugetlbfs for better regression testing.

Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Acked-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Joern Engel <joern@logfs.org>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: David Rientjes <rientjes@google.com>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/run_vmtests | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
index d891d6e326f491..9179ce8df485d7 100755
--- a/tools/testing/selftests/vm/run_vmtests
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -75,6 +75,10 @@ else
 	echo "[PASS]"
 fi
 
+echo "NOTE: The above hugetlb tests provide minimal coverage.  Use"
+echo "      https://github.com/libhugetlbfs/libhugetlbfs.git for"
+echo "      hugetlb regression testing."
+
 echo "--------------------"
 echo "running userfaultfd"
 echo "--------------------"

From e6590740ceb83fd014fae7d571fe5a5d5886b7c8 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Tue, 8 Sep 2015 15:02:58 -0700
Subject: [PATCH 688/734] Documentation: update libhugetlbfs location and use
 for testing

The URL for libhugetlbfs has changed.  Also, put a stronger emphasis on
using libgugetlbfs for hugetlb regression testing.

Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Acked-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Joern Engel <joern@logfs.org>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: David Rientjes <rientjes@google.com>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/vm/hugetlbpage.txt | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/Documentation/vm/hugetlbpage.txt b/Documentation/vm/hugetlbpage.txt
index 030977fb8d2dcb..54dd9b9c6c31ae 100644
--- a/Documentation/vm/hugetlbpage.txt
+++ b/Documentation/vm/hugetlbpage.txt
@@ -329,7 +329,14 @@ Examples
 
 3) hugepage-mmap:  see tools/testing/selftests/vm/hugepage-mmap.c
 
-4) The libhugetlbfs (http://libhugetlbfs.sourceforge.net) library provides a
-   wide range of userspace tools to help with huge page usability, environment
-   setup, and control. Furthermore it provides useful test cases that should be
-   used when modifying code to ensure no regressions are introduced.
+4) The libhugetlbfs (https://github.com/libhugetlbfs/libhugetlbfs) library
+   provides a wide range of userspace tools to help with huge page usability,
+   environment setup, and control.
+
+Kernel development regression testing
+=====================================
+
+The most complete set of hugetlb tests are in the libhugetlbfs repository.
+If you modify any hugetlb related code, use the libhugetlbfs test suite
+to check for regressions.  In addition, if you add any new hugetlb
+functionality, please add appropriate tests to libhugetlbfs.

From 6b0f68e32ea8749ff7d4a66cd5761e915e48e59d Mon Sep 17 00:00:00 2001
From: Mark Salter <msalter@redhat.com>
Date: Tue, 8 Sep 2015 15:03:01 -0700
Subject: [PATCH 689/734] mm: add utility for early copy from unmapped ram

When booting an arm64 kernel w/initrd using UEFI/grub, use of mem= will
likely cut off part or all of the initrd.  This leaves it outside the
kernel linear map which leads to failure when unpacking.  The x86 code
has a similar need to relocate an initrd outside of mapped memory in
some cases.

The current x86 code uses early_memremap() to copy the original initrd
from unmapped to mapped RAM.  This patchset creates a generic
copy_from_early_mem() utility based on that x86 code and has arm64 and
x86 share it in their respective initrd relocation code.

This patch (of 3):

In some early boot circumstances, it may be necessary to copy from RAM
outside the kernel linear mapping to mapped RAM.  The need to relocate
an initrd is one example in the x86 code.  This patch creates a helper
function based on current x86 code.

Signed-off-by: Mark Salter <msalter@redhat.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/early_ioremap.h |  6 ++++++
 mm/early_ioremap.c                  | 22 ++++++++++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/include/asm-generic/early_ioremap.h b/include/asm-generic/early_ioremap.h
index a5de55c04fb2ee..e539f27ec51ba6 100644
--- a/include/asm-generic/early_ioremap.h
+++ b/include/asm-generic/early_ioremap.h
@@ -33,6 +33,12 @@ extern void early_ioremap_setup(void);
  */
 extern void early_ioremap_reset(void);
 
+/*
+ * Early copy from unmapped memory to kernel mapped memory.
+ */
+extern void copy_from_early_mem(void *dest, phys_addr_t src,
+				unsigned long size);
+
 #else
 static inline void early_ioremap_init(void) { }
 static inline void early_ioremap_setup(void) { }
diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c
index e10ccd299d6666..a0baeb4be934b3 100644
--- a/mm/early_ioremap.c
+++ b/mm/early_ioremap.c
@@ -217,6 +217,28 @@ early_memremap(resource_size_t phys_addr, unsigned long size)
 	return (__force void *)__early_ioremap(phys_addr, size,
 					       FIXMAP_PAGE_NORMAL);
 }
+
+#define MAX_MAP_CHUNK	(NR_FIX_BTMAPS << PAGE_SHIFT)
+
+void __init copy_from_early_mem(void *dest, phys_addr_t src, unsigned long size)
+{
+	unsigned long slop, clen;
+	char *p;
+
+	while (size) {
+		slop = src & ~PAGE_MASK;
+		clen = size;
+		if (clen > MAX_MAP_CHUNK - slop)
+			clen = MAX_MAP_CHUNK - slop;
+		p = early_memremap(src & PAGE_MASK, clen + slop);
+		memcpy(dest, p + slop, clen);
+		early_memunmap(p, clen + slop);
+		dest += clen;
+		src += clen;
+		size -= clen;
+	}
+}
+
 #else /* CONFIG_MMU */
 
 void __init __iomem *

From 1570f0d7ab425c1e0905715bf9cc98b2a82e723f Mon Sep 17 00:00:00 2001
From: Mark Salter <msalter@redhat.com>
Date: Tue, 8 Sep 2015 15:03:04 -0700
Subject: [PATCH 690/734] arm64: support initrd outside kernel linear map

The use of mem= could leave part or all of the initrd outside of the
kernel linear map.  This will lead to an error when unpacking the initrd
and a probable failure to boot.  This patch catches that situation and
relocates the initrd to be fully within the linear map.

Signed-off-by: Mark Salter <msalter@redhat.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm64/kernel/setup.c | 62 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 8884788812433b..6bab21f84a9ff3 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -339,6 +339,67 @@ static void __init request_standard_resources(void)
 	}
 }
 
+#ifdef CONFIG_BLK_DEV_INITRD
+/*
+ * Relocate initrd if it is not completely within the linear mapping.
+ * This would be the case if mem= cuts out all or part of it.
+ */
+static void __init relocate_initrd(void)
+{
+	phys_addr_t orig_start = __virt_to_phys(initrd_start);
+	phys_addr_t orig_end = __virt_to_phys(initrd_end);
+	phys_addr_t ram_end = memblock_end_of_DRAM();
+	phys_addr_t new_start;
+	unsigned long size, to_free = 0;
+	void *dest;
+
+	if (orig_end <= ram_end)
+		return;
+
+	/*
+	 * Any of the original initrd which overlaps the linear map should
+	 * be freed after relocating.
+	 */
+	if (orig_start < ram_end)
+		to_free = ram_end - orig_start;
+
+	size = orig_end - orig_start;
+
+	/* initrd needs to be relocated completely inside linear mapping */
+	new_start = memblock_find_in_range(0, PFN_PHYS(max_pfn),
+					   size, PAGE_SIZE);
+	if (!new_start)
+		panic("Cannot relocate initrd of size %ld\n", size);
+	memblock_reserve(new_start, size);
+
+	initrd_start = __phys_to_virt(new_start);
+	initrd_end   = initrd_start + size;
+
+	pr_info("Moving initrd from [%llx-%llx] to [%llx-%llx]\n",
+		orig_start, orig_start + size - 1,
+		new_start, new_start + size - 1);
+
+	dest = (void *)initrd_start;
+
+	if (to_free) {
+		memcpy(dest, (void *)__phys_to_virt(orig_start), to_free);
+		dest += to_free;
+	}
+
+	copy_from_early_mem(dest, orig_start + to_free, size - to_free);
+
+	if (to_free) {
+		pr_info("Freeing original RAMDISK from [%llx-%llx]\n",
+			orig_start, orig_start + to_free - 1);
+		memblock_free(orig_start, to_free);
+	}
+}
+#else
+static inline void __init relocate_initrd(void)
+{
+}
+#endif
+
 u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
 
 void __init setup_arch(char **cmdline_p)
@@ -372,6 +433,7 @@ void __init setup_arch(char **cmdline_p)
 	acpi_boot_table_init();
 
 	paging_init();
+	relocate_initrd();
 	request_standard_resources();
 
 	early_ioremap_reset();

From 5dd2c4bded8776ee93c8f38b739fea531095067f Mon Sep 17 00:00:00 2001
From: Mark Salter <msalter@redhat.com>
Date: Tue, 8 Sep 2015 15:03:07 -0700
Subject: [PATCH 691/734] x86: use generic early mem copy

The early_ioremap library now has a generic copy_from_early_mem()
function.  Use the generic copy function for x86 relocate_initrd().

[akpm@linux-foundation.org: remove MAX_MAP_CHUNK define, per Yinghai Lu]
Signed-off-by: Mark Salter <msalter@redhat.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/setup.c | 22 +---------------------
 1 file changed, 1 insertion(+), 21 deletions(-)

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b143c2d04420c8..baadbf90a7c59f 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -317,15 +317,12 @@ static u64 __init get_ramdisk_size(void)
 	return ramdisk_size;
 }
 
-#define MAX_MAP_CHUNK	(NR_FIX_BTMAPS << PAGE_SHIFT)
 static void __init relocate_initrd(void)
 {
 	/* Assume only end is not page aligned */
 	u64 ramdisk_image = get_ramdisk_image();
 	u64 ramdisk_size  = get_ramdisk_size();
 	u64 area_size     = PAGE_ALIGN(ramdisk_size);
-	unsigned long slop, clen, mapaddr;
-	char *p, *q;
 
 	/* We need to move the initrd down into directly mapped mem */
 	relocated_ramdisk = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
@@ -343,25 +340,8 @@ static void __init relocate_initrd(void)
 	printk(KERN_INFO "Allocated new RAMDISK: [mem %#010llx-%#010llx]\n",
 	       relocated_ramdisk, relocated_ramdisk + ramdisk_size - 1);
 
-	q = (char *)initrd_start;
-
-	/* Copy the initrd */
-	while (ramdisk_size) {
-		slop = ramdisk_image & ~PAGE_MASK;
-		clen = ramdisk_size;
-		if (clen > MAX_MAP_CHUNK-slop)
-			clen = MAX_MAP_CHUNK-slop;
-		mapaddr = ramdisk_image & PAGE_MASK;
-		p = early_memremap(mapaddr, clen+slop);
-		memcpy(q, p+slop, clen);
-		early_memunmap(p, clen+slop);
-		q += clen;
-		ramdisk_image += clen;
-		ramdisk_size  -= clen;
-	}
+	copy_from_early_mem((void *)initrd_start, ramdisk_image, ramdisk_size);
 
-	ramdisk_image = get_ramdisk_image();
-	ramdisk_size  = get_ramdisk_size();
 	printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to"
 		" [mem %#010llx-%#010llx]\n",
 		ramdisk_image, ramdisk_image + ramdisk_size - 1,

From 7d1900c744b2e4687b3e467edf58373c02bcf22d Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Tue, 8 Sep 2015 15:03:10 -0700
Subject: [PATCH 692/734] mm/hwpoison: fix failure to split thp w/ refcount
 held

THP pages will get a refcount in madvise_hwpoison() w/
MF_COUNT_INCREASED flag, however, the refcount is still held when fail
to split THP pages.

Fix it by reducing the refcount of THP pages when fail to split THP.

Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory-failure.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 016c814101edba..8ad923a93539b4 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1725,6 +1725,8 @@ int soft_offline_page(struct page *page, int flags)
 		if (PageAnon(hpage) && unlikely(split_huge_page(hpage))) {
 			pr_info("soft offline: %#lx: failed to split THP\n",
 				pfn);
+			if (flags & MF_COUNT_INCREASED)
+				put_page(page);
 			return -EBUSY;
 		}
 	}

From 1e0e635be82132167a134b5a9c884e70e61f8373 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Tue, 8 Sep 2015 15:03:13 -0700
Subject: [PATCH 693/734] mm/hwpoison: fix PageHWPoison test/set race

There is a race between madvise_hwpoison path and memory_failure:

 CPU0					CPU1

madvise_hwpoison
get_user_pages_fast
PageHWPoison check (false)
					memory_failure
					TestSetPageHWPoison
soft_offline_page
PageHWPoison check (true)
return -EBUSY (without put_page)

Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Suggested-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory-failure.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 8ad923a93539b4..863544d84a0996 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1719,6 +1719,8 @@ int soft_offline_page(struct page *page, int flags)
 
 	if (PageHWPoison(page)) {
 		pr_info("soft offline: %#lx page already poisoned\n", pfn);
+		if (flags & MF_COUNT_INCREASED)
+			put_page(page);
 		return -EBUSY;
 	}
 	if (!PageHuge(page) && PageTransHuge(hpage)) {

From 94bf4ec84a84d3ab2513b4e681fd3d083328d76d Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Tue, 8 Sep 2015 15:03:15 -0700
Subject: [PATCH 694/734] mm/hwpoison: introduce put_hwpoison_page to put
 refcount for memory error handling

Introduce put_hwpoison_page to put refcount for memory error handling.

Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Suggested-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h  |  1 +
 mm/memory-failure.c | 21 +++++++++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index bab8ff89da5085..11df1a8ea38bd8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2169,6 +2169,7 @@ extern int memory_failure(unsigned long pfn, int trapno, int flags);
 extern void memory_failure_queue(unsigned long pfn, int trapno, int flags);
 extern int unpoison_memory(unsigned long pfn);
 extern int get_hwpoison_page(struct page *page);
+extern void put_hwpoison_page(struct page *page);
 extern int sysctl_memory_failure_early_kill;
 extern int sysctl_memory_failure_recovery;
 extern void shake_page(struct page *p, int access);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 863544d84a0996..5ceb8253e33b0d 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -934,6 +934,27 @@ int get_hwpoison_page(struct page *page)
 }
 EXPORT_SYMBOL_GPL(get_hwpoison_page);
 
+/**
+ * put_hwpoison_page() - Put refcount for memory error handling:
+ * @page:	raw error page (hit by memory error)
+ */
+void put_hwpoison_page(struct page *page)
+{
+	struct page *head = compound_head(page);
+
+	if (PageHuge(head)) {
+		put_page(head);
+		return;
+	}
+
+	if (PageTransHuge(head))
+		if (page != head)
+			put_page(head);
+
+	put_page(page);
+}
+EXPORT_SYMBOL_GPL(put_hwpoison_page);
+
 /*
  * Do all that is necessary to remove user space mappings. Unmap
  * the pages and send SIGBUS to the processes if the data was dirty.

From be91748fa6ca6909853c3dc630d65e45084962d7 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Tue, 8 Sep 2015 15:03:18 -0700
Subject: [PATCH 695/734] mm/hwpoison: fix refcount of THP head page in
 no-injection case

Hwpoison injection takes a refcount of target page and another refcount
of head page of THP if the target page is the tail page of a THP.
However, current code doesn't release the refcount of head page if the
THP is not supported to be injected wrt hwpoison filter.

Fix it by reducing the refcount of head page if the target page is the
tail page of a THP and it is not supported to be injected.

Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Acked-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hwpoison-inject.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c
index bf73ac17dad424..aeba0edd6e447b 100644
--- a/mm/hwpoison-inject.c
+++ b/mm/hwpoison-inject.c
@@ -58,7 +58,7 @@ static int hwpoison_inject(void *data, u64 val)
 	pr_info("Injecting memory failure at pfn %#lx\n", pfn);
 	return memory_failure(pfn, 18, MF_COUNT_INCREASED);
 put_out:
-	put_page(p);
+	put_hwpoison_page(p);
 	return 0;
 }
 

From 665d9da7f0a9bd80b64d0024630806e45c7ff7d7 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Tue, 8 Sep 2015 15:03:21 -0700
Subject: [PATCH 696/734] mm/hwpoison: replace most of put_page in memory error
 handling by put_hwpoison_page

Replace most instances of put_page() in memory error handling with
put_hwpoison_page().

Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory-failure.c | 32 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 5ceb8253e33b0d..5420d3819adf99 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1174,9 +1174,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 				pr_err("MCE: %#lx: thp split failed\n", pfn);
 			if (TestClearPageHWPoison(p))
 				atomic_long_sub(nr_pages, &num_poisoned_pages);
-			put_page(p);
-			if (p != hpage)
-				put_page(hpage);
+			put_hwpoison_page(p);
 			return -EBUSY;
 		}
 		VM_BUG_ON_PAGE(!page_count(p), p);
@@ -1237,14 +1235,14 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 		printk(KERN_ERR "MCE %#lx: just unpoisoned\n", pfn);
 		atomic_long_sub(nr_pages, &num_poisoned_pages);
 		unlock_page(hpage);
-		put_page(hpage);
+		put_hwpoison_page(hpage);
 		return 0;
 	}
 	if (hwpoison_filter(p)) {
 		if (TestClearPageHWPoison(p))
 			atomic_long_sub(nr_pages, &num_poisoned_pages);
 		unlock_page(hpage);
-		put_page(hpage);
+		put_hwpoison_page(hpage);
 		return 0;
 	}
 
@@ -1258,7 +1256,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 	if (PageHuge(p) && PageTail(p) && TestSetPageHWPoison(hpage)) {
 		action_result(pfn, MF_MSG_POISONED_HUGE, MF_IGNORED);
 		unlock_page(hpage);
-		put_page(hpage);
+		put_hwpoison_page(hpage);
 		return 0;
 	}
 	/*
@@ -1492,9 +1490,9 @@ int unpoison_memory(unsigned long pfn)
 	}
 	unlock_page(page);
 
-	put_page(page);
+	put_hwpoison_page(page);
 	if (freeit && !(pfn == my_zero_pfn(0) && page_count(p) == 1))
-		put_page(page);
+		put_hwpoison_page(page);
 
 	return 0;
 }
@@ -1554,7 +1552,7 @@ static int get_any_page(struct page *page, unsigned long pfn, int flags)
 		/*
 		 * Try to free it.
 		 */
-		put_page(page);
+		put_hwpoison_page(page);
 		shake_page(page, 1);
 
 		/*
@@ -1563,7 +1561,7 @@ static int get_any_page(struct page *page, unsigned long pfn, int flags)
 		ret = __get_any_page(page, pfn, 0);
 		if (!PageLRU(page)) {
 			/* Drop page reference which is from __get_any_page() */
-			put_page(page);
+			put_hwpoison_page(page);
 			pr_info("soft_offline: %#lx: unknown non LRU page type %lx\n",
 				pfn, page->flags);
 			return -EIO;
@@ -1586,7 +1584,7 @@ static int soft_offline_huge_page(struct page *page, int flags)
 	lock_page(hpage);
 	if (PageHWPoison(hpage)) {
 		unlock_page(hpage);
-		put_page(hpage);
+		put_hwpoison_page(hpage);
 		pr_info("soft offline: %#lx hugepage already poisoned\n", pfn);
 		return -EBUSY;
 	}
@@ -1597,7 +1595,7 @@ static int soft_offline_huge_page(struct page *page, int flags)
 	 * get_any_page() and isolate_huge_page() takes a refcount each,
 	 * so need to drop one here.
 	 */
-	put_page(hpage);
+	put_hwpoison_page(hpage);
 	if (!ret) {
 		pr_info("soft offline: %#lx hugepage failed to isolate\n", pfn);
 		return -EBUSY;
@@ -1646,7 +1644,7 @@ static int __soft_offline_page(struct page *page, int flags)
 	wait_on_page_writeback(page);
 	if (PageHWPoison(page)) {
 		unlock_page(page);
-		put_page(page);
+		put_hwpoison_page(page);
 		pr_info("soft offline: %#lx page already poisoned\n", pfn);
 		return -EBUSY;
 	}
@@ -1661,7 +1659,7 @@ static int __soft_offline_page(struct page *page, int flags)
 	 * would need to fix isolation locking first.
 	 */
 	if (ret == 1) {
-		put_page(page);
+		put_hwpoison_page(page);
 		pr_info("soft_offline: %#lx: invalidated\n", pfn);
 		SetPageHWPoison(page);
 		atomic_long_inc(&num_poisoned_pages);
@@ -1678,7 +1676,7 @@ static int __soft_offline_page(struct page *page, int flags)
 	 * Drop page reference which is came from get_any_page()
 	 * successful isolate_lru_page() already took another one.
 	 */
-	put_page(page);
+	put_hwpoison_page(page);
 	if (!ret) {
 		LIST_HEAD(pagelist);
 		inc_zone_page_state(page, NR_ISOLATED_ANON +
@@ -1741,7 +1739,7 @@ int soft_offline_page(struct page *page, int flags)
 	if (PageHWPoison(page)) {
 		pr_info("soft offline: %#lx page already poisoned\n", pfn);
 		if (flags & MF_COUNT_INCREASED)
-			put_page(page);
+			put_hwpoison_page(page);
 		return -EBUSY;
 	}
 	if (!PageHuge(page) && PageTransHuge(hpage)) {
@@ -1749,7 +1747,7 @@ int soft_offline_page(struct page *page, int flags)
 			pr_info("soft offline: %#lx: failed to split THP\n",
 				pfn);
 			if (flags & MF_COUNT_INCREASED)
-				put_page(page);
+				put_hwpoison_page(page);
 			return -EBUSY;
 		}
 	}

From 8e30456b6c56029ecbb43b777519175e478adfbf Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Tue, 8 Sep 2015 15:03:24 -0700
Subject: [PATCH 697/734] mm/hwpoison: introduce num_poisoned_pages wrappers

num_poisoned_pages counter will be changed outside mm/memory-failure.c
by a subsequent patch, so this patch prepares wrappers to manipulate it.

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Tested-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swapops.h | 23 +++++++++++++++++++++++
 mm/memory-failure.c     | 30 ++++++++++++++----------------
 2 files changed, 37 insertions(+), 16 deletions(-)

diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index cedf3d3c373f16..ec04669f2a3bf9 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -164,6 +164,9 @@ static inline int is_write_migration_entry(swp_entry_t entry)
 #endif
 
 #ifdef CONFIG_MEMORY_FAILURE
+
+extern atomic_long_t num_poisoned_pages __read_mostly;
+
 /*
  * Support for hardware poisoned pages
  */
@@ -177,6 +180,26 @@ static inline int is_hwpoison_entry(swp_entry_t entry)
 {
 	return swp_type(entry) == SWP_HWPOISON;
 }
+
+static inline void num_poisoned_pages_inc(void)
+{
+	atomic_long_inc(&num_poisoned_pages);
+}
+
+static inline void num_poisoned_pages_dec(void)
+{
+	atomic_long_dec(&num_poisoned_pages);
+}
+
+static inline void num_poisoned_pages_add(long num)
+{
+	atomic_long_add(num, &num_poisoned_pages);
+}
+
+static inline void num_poisoned_pages_sub(long num)
+{
+	atomic_long_sub(num, &num_poisoned_pages);
+}
 #else
 
 static inline swp_entry_t make_hwpoison_entry(struct page *page)
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 5420d3819adf99..393ea13b07546e 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1121,7 +1121,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 		nr_pages = 1 << compound_order(hpage);
 	else /* normal page or thp */
 		nr_pages = 1;
-	atomic_long_add(nr_pages, &num_poisoned_pages);
+	num_poisoned_pages_add(nr_pages);
 
 	/*
 	 * We need/can do nothing about count=0 pages.
@@ -1149,7 +1149,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 			if (PageHWPoison(hpage)) {
 				if ((hwpoison_filter(p) && TestClearPageHWPoison(p))
 				    || (p != hpage && TestSetPageHWPoison(hpage))) {
-					atomic_long_sub(nr_pages, &num_poisoned_pages);
+					num_poisoned_pages_sub(nr_pages);
 					unlock_page(hpage);
 					return 0;
 				}
@@ -1173,7 +1173,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 			else
 				pr_err("MCE: %#lx: thp split failed\n", pfn);
 			if (TestClearPageHWPoison(p))
-				atomic_long_sub(nr_pages, &num_poisoned_pages);
+				num_poisoned_pages_sub(nr_pages);
 			put_hwpoison_page(p);
 			return -EBUSY;
 		}
@@ -1233,14 +1233,14 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 	 */
 	if (!PageHWPoison(p)) {
 		printk(KERN_ERR "MCE %#lx: just unpoisoned\n", pfn);
-		atomic_long_sub(nr_pages, &num_poisoned_pages);
+		num_poisoned_pages_sub(nr_pages);
 		unlock_page(hpage);
 		put_hwpoison_page(hpage);
 		return 0;
 	}
 	if (hwpoison_filter(p)) {
 		if (TestClearPageHWPoison(p))
-			atomic_long_sub(nr_pages, &num_poisoned_pages);
+			num_poisoned_pages_sub(nr_pages);
 		unlock_page(hpage);
 		put_hwpoison_page(hpage);
 		return 0;
@@ -1469,7 +1469,7 @@ int unpoison_memory(unsigned long pfn)
 			return 0;
 		}
 		if (TestClearPageHWPoison(p))
-			atomic_long_dec(&num_poisoned_pages);
+			num_poisoned_pages_dec();
 		pr_info("MCE: Software-unpoisoned free page %#lx\n", pfn);
 		return 0;
 	}
@@ -1483,7 +1483,7 @@ int unpoison_memory(unsigned long pfn)
 	 */
 	if (TestClearPageHWPoison(page)) {
 		pr_info("MCE: Software-unpoisoned page %#lx\n", pfn);
-		atomic_long_sub(nr_pages, &num_poisoned_pages);
+		num_poisoned_pages_sub(nr_pages);
 		freeit = 1;
 		if (PageHuge(page))
 			clear_page_hwpoison_huge_page(page);
@@ -1619,11 +1619,10 @@ static int soft_offline_huge_page(struct page *page, int flags)
 		if (PageHuge(page)) {
 			set_page_hwpoison_huge_page(hpage);
 			dequeue_hwpoisoned_huge_page(hpage);
-			atomic_long_add(1 << compound_order(hpage),
-					&num_poisoned_pages);
+			num_poisoned_pages_add(1 << compound_order(hpage));
 		} else {
 			SetPageHWPoison(page);
-			atomic_long_inc(&num_poisoned_pages);
+			num_poisoned_pages_inc();
 		}
 	}
 	return ret;
@@ -1662,7 +1661,7 @@ static int __soft_offline_page(struct page *page, int flags)
 		put_hwpoison_page(page);
 		pr_info("soft_offline: %#lx: invalidated\n", pfn);
 		SetPageHWPoison(page);
-		atomic_long_inc(&num_poisoned_pages);
+		num_poisoned_pages_inc();
 		return 0;
 	}
 
@@ -1683,7 +1682,7 @@ static int __soft_offline_page(struct page *page, int flags)
 					page_is_file_cache(page));
 		list_add(&page->lru, &pagelist);
 		if (!TestSetPageHWPoison(page))
-			atomic_long_inc(&num_poisoned_pages);
+			num_poisoned_pages_dec();
 		ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
 					MIGRATE_SYNC, MR_MEMORY_FAILURE);
 		if (ret) {
@@ -1699,7 +1698,7 @@ static int __soft_offline_page(struct page *page, int flags)
 			if (ret > 0)
 				ret = -EIO;
 			if (TestClearPageHWPoison(page))
-				atomic_long_dec(&num_poisoned_pages);
+				num_poisoned_pages_dec();
 		}
 	} else {
 		pr_info("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n",
@@ -1765,11 +1764,10 @@ int soft_offline_page(struct page *page, int flags)
 		if (PageHuge(page)) {
 			set_page_hwpoison_huge_page(hpage);
 			if (!dequeue_hwpoisoned_huge_page(hpage))
-				atomic_long_add(1 << compound_order(hpage),
-					&num_poisoned_pages);
+				num_poisoned_pages_add(1 << compound_order(hpage));
 		} else {
 			if (!TestSetPageHWPoison(page))
-				atomic_long_inc(&num_poisoned_pages);
+				num_poisoned_pages_inc();
 		}
 	}
 	return ret;

From da1b13ccfbebe0b9d69b5d61eff0a675e19e69a5 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Tue, 8 Sep 2015 15:03:27 -0700
Subject: [PATCH 698/734] mm/hwpoison: fix race between soft_offline_page and
 unpoison_memory

Wanpeng Li reported a race between soft_offline_page() and
unpoison_memory(), which causes the following kernel panic:

   BUG: Bad page state in process bash  pfn:97000
   page:ffffea00025c0000 count:0 mapcount:1 mapping:          (null) index:0x7f4fdbe00
   flags: 0x1fffff80080048(uptodate|active|swapbacked)
   page dumped because: PAGE_FLAGS_CHECK_AT_FREE flag(s) set
   bad because of flags:
   flags: 0x40(active)
   Modules linked in: snd_hda_codec_hdmi i915 rpcsec_gss_krb5 nfsv4 dns_resolver bnep rfcomm nfsd bluetooth auth_rpcgss nfs_acl nfs rfkill lockd grace sunrpc i2c_algo_bit drm_kms_helper snd_hda_codec_realtek snd_hda_codec_generic drm snd_hda_intel fscache snd_hda_codec x86_pkg_temp_thermal coretemp kvm_intel snd_hda_core snd_hwdep kvm snd_pcm snd_seq_dummy snd_seq_oss crct10dif_pclmul snd_seq_midi crc32_pclmul snd_seq_midi_event ghash_clmulni_intel snd_rawmidi aesni_intel lrw gf128mul snd_seq glue_helper ablk_helper snd_seq_device cryptd fuse snd_timer dcdbas serio_raw mei_me parport_pc snd mei ppdev i2c_core video lp soundcore parport lpc_ich shpchp mfd_core ext4 mbcache jbd2 sd_mod e1000e ahci ptp libahci crc32c_intel libata pps_core
   CPU: 3 PID: 2211 Comm: bash Not tainted 4.2.0-rc5-mm1+ #45
   Hardware name: Dell Inc. OptiPlex 7020/0F5C5X, BIOS A03 01/08/2015
   Call Trace:
     dump_stack+0x48/0x5c
     bad_page+0xe6/0x140
     free_pages_prepare+0x2f9/0x320
     ? uncharge_list+0xdd/0x100
     free_hot_cold_page+0x40/0x170
     __put_single_page+0x20/0x30
     put_page+0x25/0x40
     unmap_and_move+0x1a6/0x1f0
     migrate_pages+0x100/0x1d0
     ? kill_procs+0x100/0x100
     ? unlock_page+0x6f/0x90
     __soft_offline_page+0x127/0x2a0
     soft_offline_page+0xa6/0x200

This race is explained like below:

  CPU0                    CPU1

  soft_offline_page
  __soft_offline_page
  TestSetPageHWPoison
                        unpoison_memory
                        PageHWPoison check (true)
                        TestClearPageHWPoison
                        put_page    -> release refcount held by get_hwpoison_page in unpoison_memory
                        put_page    -> release refcount held by isolate_lru_page in __soft_offline_page
  migrate_pages

The second put_page() releases refcount held by isolate_lru_page() which
will lead to unmap_and_move() releases the last refcount of page and w/
mapcount still 1 since try_to_unmap() is not called if there is only one
user map the page.  Anyway, the page refcount and mapcount will still
mess if the page is mapped by multiple users.

This race was introduced by commit 4491f71260 ("mm/memory-failure: set
PageHWPoison before migrate_pages()"), which focuses on preventing the
reuse of successfully migrated page.  Before this commit we prevent the
reuse by changing the migratetype to MIGRATE_ISOLATE during soft
offlining, which has the following problems, so simply reverting the
commit is not a best option:

  1) it doesn't eliminate the reuse completely, because
     set_migratetype_isolate() can fail to set MIGRATE_ISOLATE to the
     target page if the pageblock of the page contains one or more
     unmovable pages (i.e.  has_unmovable_pages() returns true).

  2) the original code changes migratetype to MIGRATE_ISOLATE
     forcibly, and sets it to MIGRATE_MOVABLE forcibly after soft offline,
     regardless of the original migratetype state, which could impact
     other subsystems like memory hotplug or compaction.

This patch moves PageSetHWPoison just after put_page() in
unmap_and_move(), which closes up the reported race window and minimizes
another race window b/w SetPageHWPoison and reallocation (which causes
the reuse of soft-offlined page.) The latter race window still exists
but it's acceptable, because it's rare and effectively the same as
ordinary "containment failure" case even if it happens, so keep the
window open is acceptable.

Fixes: 4491f71260 ("mm/memory-failure: set PageHWPoison before migrate_pages()")
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Reported-by: Wanpeng Li <wanpeng.li@hotmail.com>
Tested-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swapops.h | 14 ++++++++++++++
 mm/memory-failure.c     |  4 ----
 mm/migrate.c            |  9 +++++----
 3 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index ec04669f2a3bf9..5c3a5f3e7eec66 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -181,6 +181,11 @@ static inline int is_hwpoison_entry(swp_entry_t entry)
 	return swp_type(entry) == SWP_HWPOISON;
 }
 
+static inline bool test_set_page_hwpoison(struct page *page)
+{
+	return TestSetPageHWPoison(page);
+}
+
 static inline void num_poisoned_pages_inc(void)
 {
 	atomic_long_inc(&num_poisoned_pages);
@@ -211,6 +216,15 @@ static inline int is_hwpoison_entry(swp_entry_t swp)
 {
 	return 0;
 }
+
+static inline bool test_set_page_hwpoison(struct page *page)
+{
+	return false;
+}
+
+static inline void num_poisoned_pages_inc(void)
+{
+}
 #endif
 
 #if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION)
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 393ea13b07546e..b0664c23838b81 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1681,8 +1681,6 @@ static int __soft_offline_page(struct page *page, int flags)
 		inc_zone_page_state(page, NR_ISOLATED_ANON +
 					page_is_file_cache(page));
 		list_add(&page->lru, &pagelist);
-		if (!TestSetPageHWPoison(page))
-			num_poisoned_pages_dec();
 		ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
 					MIGRATE_SYNC, MR_MEMORY_FAILURE);
 		if (ret) {
@@ -1697,8 +1695,6 @@ static int __soft_offline_page(struct page *page, int flags)
 				pfn, ret, page->flags);
 			if (ret > 0)
 				ret = -EIO;
-			if (TestClearPageHWPoison(page))
-				num_poisoned_pages_dec();
 		}
 	} else {
 		pr_info("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n",
diff --git a/mm/migrate.c b/mm/migrate.c
index 5c08cab5419e77..918defbdda0e5c 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -880,8 +880,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 	/* Establish migration ptes or remove ptes */
 	if (page_mapped(page)) {
 		try_to_unmap(page,
-			TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS|
-			TTU_IGNORE_HWPOISON);
+			TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
 		page_was_mapped = 1;
 	}
 
@@ -952,9 +951,11 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
 		dec_zone_page_state(page, NR_ISOLATED_ANON +
 				page_is_file_cache(page));
 		/* Soft-offlined page shouldn't go through lru cache list */
-		if (reason == MR_MEMORY_FAILURE)
+		if (reason == MR_MEMORY_FAILURE) {
 			put_page(page);
-		else
+			if (!test_set_page_hwpoison(page))
+				num_poisoned_pages_inc();
+		} else
 			putback_lru_page(page);
 	}
 

From 230ac719c500e58e71342be381ad2042a8cffc42 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Tue, 8 Sep 2015 15:03:29 -0700
Subject: [PATCH 699/734] mm/hwpoison: don't try to unpoison containment-failed
 pages

memory_failure() can be called at any page at any time, which means that
we can't eliminate the possibility of containment failure.  In such case
the best option is to leak the page intentionally (and never touch it
later.)

We have an unpoison function for testing, and it cannot handle such
containment-failed pages, which results in kernel panic (visible with
various calltraces.) So this patch suggests that we limit the
unpoisonable pages to properly contained pages and ignore any other
ones.

Testers are recommended to keep in mind that there're un-unpoisonable
pages when writing test programs.

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Tested-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory-failure.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index b0664c23838b81..bba2d7c2c9ce43 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1445,6 +1445,22 @@ int unpoison_memory(unsigned long pfn)
 		return 0;
 	}
 
+	if (page_count(page) > 1) {
+		pr_info("MCE: Someone grabs the hwpoison page %#lx\n", pfn);
+		return 0;
+	}
+
+	if (page_mapped(page)) {
+		pr_info("MCE: Someone maps the hwpoison page %#lx\n", pfn);
+		return 0;
+	}
+
+	if (page_mapping(page)) {
+		pr_info("MCE: the hwpoison page has non-NULL mapping %#lx\n",
+			pfn);
+		return 0;
+	}
+
 	/*
 	 * unpoison_memory() can encounter thp only when the thp is being
 	 * worked by memory_failure() and the page lock is not held yet.

From 567d117b8b2ab1c3437acc4799505a59bfa5722b Mon Sep 17 00:00:00 2001
From: Alexander Kuleshov <kuleshovmail@gmail.com>
Date: Tue, 8 Sep 2015 15:03:33 -0700
Subject: [PATCH 700/734] mm/memblock.c: rename local variable of memblock_type
 to 'type'

Since commit e3239ff92a17 ("memblock: Rename memblock_region to
memblock_type and memblock_property to memblock_region"), all local
variables of the membock_type type were renamed to 'type'.  This commit
renames all remaining local variables with the memblock_type type to the
same view.

Signed-off-by: Alexander Kuleshov <kuleshovmail@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memblock.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/mm/memblock.c b/mm/memblock.c
index 509255223688eb..69babe22eef798 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -615,14 +615,14 @@ static int __init_memblock memblock_add_region(phys_addr_t base,
 						int nid,
 						unsigned long flags)
 {
-	struct memblock_type *_rgn = &memblock.memory;
+	struct memblock_type *type = &memblock.memory;
 
 	memblock_dbg("memblock_add: [%#016llx-%#016llx] flags %#02lx %pF\n",
 		     (unsigned long long)base,
 		     (unsigned long long)base + size - 1,
 		     flags, (void *)_RET_IP_);
 
-	return memblock_add_range(_rgn, base, size, nid, flags);
+	return memblock_add_range(type, base, size, nid, flags);
 }
 
 int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size)
@@ -835,10 +835,10 @@ void __init_memblock __next_reserved_mem_region(u64 *idx,
 					   phys_addr_t *out_start,
 					   phys_addr_t *out_end)
 {
-	struct memblock_type *rsv = &memblock.reserved;
+	struct memblock_type *type = &memblock.reserved;
 
-	if (*idx >= 0 && *idx < rsv->cnt) {
-		struct memblock_region *r = &rsv->regions[*idx];
+	if (*idx >= 0 && *idx < type->cnt) {
+		struct memblock_region *r = &type->regions[*idx];
 		phys_addr_t base = r->base;
 		phys_addr_t size = r->size;
 

From 44a30220bc0a171c010e8df63d144655abdafe61 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Tue, 8 Sep 2015 15:03:33 -0700
Subject: [PATCH 701/734] shmem: recalculate file inode when fstat

Shmem uses shmem_recalc_inode to update i_blocks when it allocates page,
undoes range or swaps.  But mm can drop clean page without notifying
shmem.  This makes fstat sometimes return out-of-date block size.

The problem can be partially solved when we add
inode_operations->getattr which calls shmem_recalc_inode to update
i_blocks for fstat.

shmem_recalc_inode also updates counter used by statfs and
vm_committed_as.  For them the situation is not changed.  They still
suffer from the discrepancy after dropping clean page and before the
function is called by aforementioned triggers.

Signed-off-by: Yu Zhao <yuzhao@google.com>
Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/shmem.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/mm/shmem.c b/mm/shmem.c
index dbe0c1e8349c72..48ce82926d931b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -542,6 +542,21 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
 }
 EXPORT_SYMBOL_GPL(shmem_truncate_range);
 
+static int shmem_getattr(struct vfsmount *mnt, struct dentry *dentry,
+			 struct kstat *stat)
+{
+	struct inode *inode = dentry->d_inode;
+	struct shmem_inode_info *info = SHMEM_I(inode);
+
+	spin_lock(&info->lock);
+	shmem_recalc_inode(inode);
+	spin_unlock(&info->lock);
+
+	generic_fillattr(inode, stat);
+
+	return 0;
+}
+
 static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
 {
 	struct inode *inode = d_inode(dentry);
@@ -3122,6 +3137,7 @@ static const struct file_operations shmem_file_operations = {
 };
 
 static const struct inode_operations shmem_inode_operations = {
+	.getattr	= shmem_getattr,
 	.setattr	= shmem_setattr,
 #ifdef CONFIG_TMPFS_XATTR
 	.setxattr	= shmem_setxattr,

From e3975891254e08d220ddcafca93a0e05d9560bfb Mon Sep 17 00:00:00 2001
From: Chen Gang <gang.chen.5i5j@gmail.com>
Date: Tue, 8 Sep 2015 15:03:38 -0700
Subject: [PATCH 702/734] mm/mmap.c: simplify the failure return working flow

__split_vma() doesn't need out_err label, neither need initializing err.

copy_vma() can return NULL directly when kmem_cache_alloc() fails.

Signed-off-by: Chen Gang <gang.chen.5i5j@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/mmap.c | 44 ++++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/mm/mmap.c b/mm/mmap.c
index 52a2373d0ed421..7a3b12399f0636 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2455,7 +2455,7 @@ static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
 	      unsigned long addr, int new_below)
 {
 	struct vm_area_struct *new;
-	int err = -ENOMEM;
+	int err;
 
 	if (is_vm_hugetlb_page(vma) && (addr &
 					~(huge_page_mask(hstate_vma(vma)))))
@@ -2463,7 +2463,7 @@ static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
 
 	new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
 	if (!new)
-		goto out_err;
+		return -ENOMEM;
 
 	/* most fields are the same, copy all, and then fixup */
 	*new = *vma;
@@ -2511,7 +2511,6 @@ static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
 	mpol_put(vma_policy(new));
  out_free_vma:
 	kmem_cache_free(vm_area_cachep, new);
- out_err:
 	return err;
 }
 
@@ -2952,30 +2951,31 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 		*need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
 	} else {
 		new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
-		if (new_vma) {
-			*new_vma = *vma;
-			new_vma->vm_start = addr;
-			new_vma->vm_end = addr + len;
-			new_vma->vm_pgoff = pgoff;
-			if (vma_dup_policy(vma, new_vma))
-				goto out_free_vma;
-			INIT_LIST_HEAD(&new_vma->anon_vma_chain);
-			if (anon_vma_clone(new_vma, vma))
-				goto out_free_mempol;
-			if (new_vma->vm_file)
-				get_file(new_vma->vm_file);
-			if (new_vma->vm_ops && new_vma->vm_ops->open)
-				new_vma->vm_ops->open(new_vma);
-			vma_link(mm, new_vma, prev, rb_link, rb_parent);
-			*need_rmap_locks = false;
-		}
+		if (!new_vma)
+			goto out;
+		*new_vma = *vma;
+		new_vma->vm_start = addr;
+		new_vma->vm_end = addr + len;
+		new_vma->vm_pgoff = pgoff;
+		if (vma_dup_policy(vma, new_vma))
+			goto out_free_vma;
+		INIT_LIST_HEAD(&new_vma->anon_vma_chain);
+		if (anon_vma_clone(new_vma, vma))
+			goto out_free_mempol;
+		if (new_vma->vm_file)
+			get_file(new_vma->vm_file);
+		if (new_vma->vm_ops && new_vma->vm_ops->open)
+			new_vma->vm_ops->open(new_vma);
+		vma_link(mm, new_vma, prev, rb_link, rb_parent);
+		*need_rmap_locks = false;
 	}
 	return new_vma;
 
- out_free_mempol:
+out_free_mempol:
 	mpol_put(vma_policy(new_vma));
- out_free_vma:
+out_free_vma:
 	kmem_cache_free(vm_area_cachep, new_vma);
+out:
 	return NULL;
 }
 

From 21cd3a604797c2774676926a95a3d17d4cd5cbb3 Mon Sep 17 00:00:00 2001
From: Wang Kai <morgan.wang@huawei.com>
Date: Tue, 8 Sep 2015 15:03:41 -0700
Subject: [PATCH 703/734] kmemleak: record accurate early log buffer count and
 report when exceeded

In log_early function, crt_early_log should also count once when
'crt_early_log >= ARRAY_SIZE(early_log)'.  Otherwise the reported count
from kmemleak_init is one less than 'actual number'.

Then, in kmemleak_init, if early_log buffer size equal actual number,
kmemleak will init sucessful, so change warning condition to
'crt_early_log > ARRAY_SIZE(early_log)'.

Signed-off-by: Wang Kai <morgan.wang@huawei.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/kmemleak.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index cf79f110157c91..f532f6a37b553b 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -838,6 +838,7 @@ static void __init log_early(int op_type, const void *ptr, size_t size,
 	}
 
 	if (crt_early_log >= ARRAY_SIZE(early_log)) {
+		crt_early_log++;
 		kmemleak_disable();
 		return;
 	}
@@ -1882,7 +1883,7 @@ void __init kmemleak_init(void)
 	object_cache = KMEM_CACHE(kmemleak_object, SLAB_NOLEAKTRACE);
 	scan_area_cache = KMEM_CACHE(kmemleak_scan_area, SLAB_NOLEAKTRACE);
 
-	if (crt_early_log >= ARRAY_SIZE(early_log))
+	if (crt_early_log > ARRAY_SIZE(early_log))
 		pr_warning("Early log buffer exceeded (%d), please increase "
 			   "DEBUG_KMEMLEAK_EARLY_LOG_SIZE\n", crt_early_log);
 

From 26f5d7609f03ad8d6dc552458e4e371a62416b37 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@poochiereds.net>
Date: Tue, 8 Sep 2015 15:03:44 -0700
Subject: [PATCH 704/734] list_lru: don't call list_lru_from_kmem if the
 list_head is empty

If the list_head is empty then we'll have called list_lru_from_kmem for
nothing.  Move that call inside of the list_empty if block.

Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
Reviewed-by: Vladimir Davydov <vdavydov@parallels.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/list_lru.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/list_lru.c b/mm/list_lru.c
index 909eca2c820e4a..e1da19fac1b362 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -99,8 +99,8 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item)
 	struct list_lru_one *l;
 
 	spin_lock(&nlru->lock);
-	l = list_lru_from_kmem(nlru, item);
 	if (list_empty(item)) {
+		l = list_lru_from_kmem(nlru, item);
 		list_add_tail(item, &l->list);
 		l->nr_items++;
 		spin_unlock(&nlru->lock);
@@ -118,8 +118,8 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item)
 	struct list_lru_one *l;
 
 	spin_lock(&nlru->lock);
-	l = list_lru_from_kmem(nlru, item);
 	if (!list_empty(item)) {
+		l = list_lru_from_kmem(nlru, item);
 		list_del_init(item);
 		l->nr_items--;
 		spin_unlock(&nlru->lock);

From 7fadc820222497eac234d1d51a66517c00a6ca4c Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Tue, 8 Sep 2015 15:03:46 -0700
Subject: [PATCH 705/734] mm, vmscan: unlock page while waiting on writeback

This is merely a politeness: I've not found that shrink_page_list()
leads to deadlock with the page it holds locked across
wait_on_page_writeback(); but nevertheless, why hold others off by
keeping the page locked there?

And while we're at it: remove the mistaken "not " from the commentary on
this Case 3 (and a distracting blank line from Case 2, if I may).

Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/vmscan.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8276a3a615ca00..2d978b28a410b2 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -985,7 +985,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		 *    __GFP_IO|__GFP_FS for this reason); but more thought
 		 *    would probably show more reasons.
 		 *
-		 * 3) Legacy memcg encounters a page that is not already marked
+		 * 3) Legacy memcg encounters a page that is already marked
 		 *    PageReclaim. memcg does not have any dirty pages
 		 *    throttling so we could easily OOM just because too many
 		 *    pages are in writeback and there is nothing else to
@@ -1015,12 +1015,15 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 				 */
 				SetPageReclaim(page);
 				nr_writeback++;
-
 				goto keep_locked;
 
 			/* Case 3 above */
 			} else {
+				unlock_page(page);
 				wait_on_page_writeback(page);
+				/* then go back and try same page again */
+				list_add_tail(&page->lru, page_list);
+				continue;
 			}
 		}
 

From 96db800f5d73cd5c49461253d45766e094f0f8c2 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 8 Sep 2015 15:03:50 -0700
Subject: [PATCH 706/734] mm: rename alloc_pages_exact_node() to
 __alloc_pages_node()

alloc_pages_exact_node() was introduced in commit 6484eb3e2a81 ("page
allocator: do not check NUMA node ID when the caller knows the node is
valid") as an optimized variant of alloc_pages_node(), that doesn't
fallback to current node for nid == NUMA_NO_NODE.  Unfortunately the
name of the function can easily suggest that the allocation is
restricted to the given node and fails otherwise.  In truth, the node is
only preferred, unless __GFP_THISNODE is passed among the gfp flags.

The misleading name has lead to mistakes in the past, see for example
commits 5265047ac301 ("mm, thp: really limit transparent hugepage
allocation to local node") and b360edb43f8e ("mm, mempolicy:
migrate_to_node should only migrate to node").

Another issue with the name is that there's a family of
alloc_pages_exact*() functions where 'exact' means exact size (instead
of page order), which leads to more confusion.

To prevent further mistakes, this patch effectively renames
alloc_pages_exact_node() to __alloc_pages_node() to better convey that
it's an optimized variant of alloc_pages_node() not intended for general
usage.  Both functions get described in comments.

It has been also considered to really provide a convenience function for
allocations restricted to a node, but the major opinion seems to be that
__GFP_THISNODE already provides that functionality and we shouldn't
duplicate the API needlessly.  The number of users would be small
anyway.

Existing callers of alloc_pages_exact_node() are simply converted to
call __alloc_pages_node(), with the exception of sba_alloc_coherent()
which open-codes the check for NUMA_NO_NODE, so it is converted to use
alloc_pages_node() instead.  This means it no longer performs some
VM_BUG_ON checks, and since the current check for nid in
alloc_pages_node() uses a 'nid < 0' comparison (which includes
NUMA_NO_NODE), it may hide wrong values which would be previously
exposed.

Both differences will be rectified by the next patch.

To sum up, this patch makes no functional changes, except temporarily
hiding potentially buggy callers.  Restricting the checks in
alloc_pages_node() is left for the next patch which can in turn expose
more existing buggy callers.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Robin Holt <robinmholt@gmail.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Christoph Lameter <cl@linux.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: Mel Gorman <mgorman@suse.de>
Cc: David Rientjes <rientjes@google.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Gleb Natapov <gleb@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Cliff Whickman <cpw@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/hp/common/sba_iommu.c   |  6 +-----
 arch/ia64/kernel/uncached.c       |  2 +-
 arch/ia64/sn/pci/pci_dma.c        |  2 +-
 arch/powerpc/platforms/cell/ras.c |  2 +-
 arch/x86/kvm/vmx.c                |  2 +-
 drivers/misc/sgi-xp/xpc_uv.c      |  2 +-
 include/linux/gfp.h               | 23 +++++++++++++++--------
 kernel/profile.c                  |  8 ++++----
 mm/filemap.c                      |  2 +-
 mm/huge_memory.c                  |  2 +-
 mm/hugetlb.c                      |  4 ++--
 mm/memory-failure.c               |  2 +-
 mm/mempolicy.c                    |  4 ++--
 mm/migrate.c                      |  4 ++--
 mm/page_alloc.c                   |  2 --
 mm/slab.c                         |  2 +-
 mm/slob.c                         |  4 ++--
 mm/slub.c                         |  2 +-
 18 files changed, 38 insertions(+), 37 deletions(-)

diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index 344387a554066b..a6d6190c9d24c0 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -1140,13 +1140,9 @@ sba_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
 
 #ifdef CONFIG_NUMA
 	{
-		int node = ioc->node;
 		struct page *page;
 
-		if (node == NUMA_NO_NODE)
-			node = numa_node_id();
-
-		page = alloc_pages_exact_node(node, flags, get_order(size));
+		page = alloc_pages_node(ioc->node, flags, get_order(size));
 		if (unlikely(!page))
 			return NULL;
 
diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c
index 20e8a9b21d7519..f3976da36721a9 100644
--- a/arch/ia64/kernel/uncached.c
+++ b/arch/ia64/kernel/uncached.c
@@ -97,7 +97,7 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid)
 
 	/* attempt to allocate a granule's worth of cached memory pages */
 
-	page = alloc_pages_exact_node(nid,
+	page = __alloc_pages_node(nid,
 				GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,
 				IA64_GRANULE_SHIFT-PAGE_SHIFT);
 	if (!page) {
diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c
index d0853e8e8623e4..8f59907007cbe3 100644
--- a/arch/ia64/sn/pci/pci_dma.c
+++ b/arch/ia64/sn/pci/pci_dma.c
@@ -92,7 +92,7 @@ static void *sn_dma_alloc_coherent(struct device *dev, size_t size,
 	 */
 	node = pcibus_to_node(pdev->bus);
 	if (likely(node >=0)) {
-		struct page *p = alloc_pages_exact_node(node,
+		struct page *p = __alloc_pages_node(node,
 						flags, get_order(size));
 
 		if (likely(p))
diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c
index e865d748179b2a..2d4f60c0119aa7 100644
--- a/arch/powerpc/platforms/cell/ras.c
+++ b/arch/powerpc/platforms/cell/ras.c
@@ -123,7 +123,7 @@ static int __init cbe_ptcal_enable_on_node(int nid, int order)
 
 	area->nid = nid;
 	area->order = order;
-	area->pages = alloc_pages_exact_node(area->nid,
+	area->pages = __alloc_pages_node(area->nid,
 						GFP_KERNEL|__GFP_THISNODE,
 						area->order);
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4a4eec30cc08c6..148ea20160222f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3150,7 +3150,7 @@ static struct vmcs *alloc_vmcs_cpu(int cpu)
 	struct page *pages;
 	struct vmcs *vmcs;
 
-	pages = alloc_pages_exact_node(node, GFP_KERNEL, vmcs_config.order);
+	pages = __alloc_pages_node(node, GFP_KERNEL, vmcs_config.order);
 	if (!pages)
 		return NULL;
 	vmcs = page_address(pages);
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
index 95c894482fddf4..340b44d9e8cf7c 100644
--- a/drivers/misc/sgi-xp/xpc_uv.c
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -239,7 +239,7 @@ xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name,
 	mq->mmr_blade = uv_cpu_to_blade_id(cpu);
 
 	nid = cpu_to_node(cpu);
-	page = alloc_pages_exact_node(nid,
+	page = __alloc_pages_node(nid,
 				      GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,
 				      pg_order);
 	if (page == NULL) {
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 3bd64b115999f7..d2c142bc872e58 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -303,20 +303,28 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
 	return __alloc_pages_nodemask(gfp_mask, order, zonelist, NULL);
 }
 
-static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
-						unsigned int order)
+/*
+ * Allocate pages, preferring the node given as nid. The node must be valid and
+ * online. For more general interface, see alloc_pages_node().
+ */
+static inline struct page *
+__alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
 {
-	/* Unknown node is current node */
-	if (nid < 0)
-		nid = numa_node_id();
+	VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES || !node_online(nid));
 
 	return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask));
 }
 
-static inline struct page *alloc_pages_exact_node(int nid, gfp_t gfp_mask,
+/*
+ * Allocate pages, preferring the node given as nid. When nid == NUMA_NO_NODE,
+ * prefer the current CPU's node.
+ */
+static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
 						unsigned int order)
 {
-	VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES || !node_online(nid));
+	/* Unknown node is current node */
+	if (nid < 0)
+		nid = numa_node_id();
 
 	return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask));
 }
@@ -357,7 +365,6 @@ extern unsigned long get_zeroed_page(gfp_t gfp_mask);
 
 void *alloc_pages_exact(size_t size, gfp_t gfp_mask);
 void free_pages_exact(void *virt, size_t size);
-/* This is different from alloc_pages_exact_node !!! */
 void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
 
 #define __get_free_page(gfp_mask) \
diff --git a/kernel/profile.c b/kernel/profile.c
index a7bcd28d6e9f58..99513e1160e518 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -339,7 +339,7 @@ static int profile_cpu_callback(struct notifier_block *info,
 		node = cpu_to_mem(cpu);
 		per_cpu(cpu_profile_flip, cpu) = 0;
 		if (!per_cpu(cpu_profile_hits, cpu)[1]) {
-			page = alloc_pages_exact_node(node,
+			page = __alloc_pages_node(node,
 					GFP_KERNEL | __GFP_ZERO,
 					0);
 			if (!page)
@@ -347,7 +347,7 @@ static int profile_cpu_callback(struct notifier_block *info,
 			per_cpu(cpu_profile_hits, cpu)[1] = page_address(page);
 		}
 		if (!per_cpu(cpu_profile_hits, cpu)[0]) {
-			page = alloc_pages_exact_node(node,
+			page = __alloc_pages_node(node,
 					GFP_KERNEL | __GFP_ZERO,
 					0);
 			if (!page)
@@ -543,14 +543,14 @@ static int create_hash_tables(void)
 		int node = cpu_to_mem(cpu);
 		struct page *page;
 
-		page = alloc_pages_exact_node(node,
+		page = __alloc_pages_node(node,
 				GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,
 				0);
 		if (!page)
 			goto out_cleanup;
 		per_cpu(cpu_profile_hits, cpu)[1]
 				= (struct profile_hit *)page_address(page);
-		page = alloc_pages_exact_node(node,
+		page = __alloc_pages_node(node,
 				GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,
 				0);
 		if (!page)
diff --git a/mm/filemap.c b/mm/filemap.c
index 30d69c0c5a3863..72940fb3866681 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -674,7 +674,7 @@ struct page *__page_cache_alloc(gfp_t gfp)
 		do {
 			cpuset_mems_cookie = read_mems_allowed_begin();
 			n = cpuset_mem_spread_node();
-			page = alloc_pages_exact_node(n, gfp, 0);
+			page = __alloc_pages_node(n, gfp, 0);
 		} while (!page && read_mems_allowed_retry(cpuset_mems_cookie));
 
 		return page;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 71a4822c832b9b..883f613ada7e2a 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2414,7 +2414,7 @@ khugepaged_alloc_page(struct page **hpage, gfp_t gfp, struct mm_struct *mm,
 	 */
 	up_read(&mm->mmap_sem);
 
-	*hpage = alloc_pages_exact_node(node, gfp, HPAGE_PMD_ORDER);
+	*hpage = __alloc_pages_node(node, gfp, HPAGE_PMD_ORDER);
 	if (unlikely(!*hpage)) {
 		count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
 		*hpage = ERR_PTR(-ENOMEM);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index cd1280c487ff94..999fb0aef8f16f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1331,7 +1331,7 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
 {
 	struct page *page;
 
-	page = alloc_pages_exact_node(nid,
+	page = __alloc_pages_node(nid,
 		htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE|
 						__GFP_REPEAT|__GFP_NOWARN,
 		huge_page_order(h));
@@ -1483,7 +1483,7 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
 				   __GFP_REPEAT|__GFP_NOWARN,
 				   huge_page_order(h));
 	else
-		page = alloc_pages_exact_node(nid,
+		page = __alloc_pages_node(nid,
 			htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE|
 			__GFP_REPEAT|__GFP_NOWARN, huge_page_order(h));
 
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index bba2d7c2c9ce43..eeda6485e76c27 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1521,7 +1521,7 @@ static struct page *new_page(struct page *p, unsigned long private, int **x)
 		return alloc_huge_page_node(page_hstate(compound_head(p)),
 						   nid);
 	else
-		return alloc_pages_exact_node(nid, GFP_HIGHUSER_MOVABLE, 0);
+		return __alloc_pages_node(nid, GFP_HIGHUSER_MOVABLE, 0);
 }
 
 /*
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index d6f2caee28c045..87a177917cb2e6 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -942,7 +942,7 @@ static struct page *new_node_page(struct page *page, unsigned long node, int **x
 		return alloc_huge_page_node(page_hstate(compound_head(page)),
 					node);
 	else
-		return alloc_pages_exact_node(node, GFP_HIGHUSER_MOVABLE |
+		return __alloc_pages_node(node, GFP_HIGHUSER_MOVABLE |
 						    __GFP_THISNODE, 0);
 }
 
@@ -1998,7 +1998,7 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
 		nmask = policy_nodemask(gfp, pol);
 		if (!nmask || node_isset(hpage_node, *nmask)) {
 			mpol_cond_put(pol);
-			page = alloc_pages_exact_node(hpage_node,
+			page = __alloc_pages_node(hpage_node,
 						gfp | __GFP_THISNODE, order);
 			goto out;
 		}
diff --git a/mm/migrate.c b/mm/migrate.c
index 918defbdda0e5c..02ce25df16c264 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1195,7 +1195,7 @@ static struct page *new_page_node(struct page *p, unsigned long private,
 		return alloc_huge_page_node(page_hstate(compound_head(p)),
 					pm->node);
 	else
-		return alloc_pages_exact_node(pm->node,
+		return __alloc_pages_node(pm->node,
 				GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, 0);
 }
 
@@ -1555,7 +1555,7 @@ static struct page *alloc_misplaced_dst_page(struct page *page,
 	int nid = (int) data;
 	struct page *newpage;
 
-	newpage = alloc_pages_exact_node(nid,
+	newpage = __alloc_pages_node(nid,
 					 (GFP_HIGHUSER_MOVABLE |
 					  __GFP_THISNODE | __GFP_NOMEMALLOC |
 					  __GFP_NORETRY | __GFP_NOWARN) &
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 252665d553b487..bdaa0cf8fd4129 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3511,8 +3511,6 @@ EXPORT_SYMBOL(alloc_pages_exact);
  *
  * Like alloc_pages_exact(), but try to allocate on node nid first before falling
  * back.
- * Note this is not alloc_pages_exact_node() which allocates on a specific node,
- * but is not exact.
  */
 void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
 {
diff --git a/mm/slab.c b/mm/slab.c
index 60c936938b8486..c77ebe6cc87cd3 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1595,7 +1595,7 @@ static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
 	if (memcg_charge_slab(cachep, flags, cachep->gfporder))
 		return NULL;
 
-	page = alloc_pages_exact_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder);
+	page = __alloc_pages_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder);
 	if (!page) {
 		memcg_uncharge_slab(cachep, cachep->gfporder);
 		slab_out_of_memory(cachep, flags, nodeid);
diff --git a/mm/slob.c b/mm/slob.c
index 165bbd3cd60626..0d7e5df74d1f03 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -45,7 +45,7 @@
  * NUMA support in SLOB is fairly simplistic, pushing most of the real
  * logic down to the page allocator, and simply doing the node accounting
  * on the upper levels. In the event that a node id is explicitly
- * provided, alloc_pages_exact_node() with the specified node id is used
+ * provided, __alloc_pages_node() with the specified node id is used
  * instead. The common case (or when the node id isn't explicitly provided)
  * will default to the current node, as per numa_node_id().
  *
@@ -193,7 +193,7 @@ static void *slob_new_pages(gfp_t gfp, int order, int node)
 
 #ifdef CONFIG_NUMA
 	if (node != NUMA_NO_NODE)
-		page = alloc_pages_exact_node(node, gfp, order);
+		page = __alloc_pages_node(node, gfp, order);
 	else
 #endif
 		page = alloc_pages(gfp, order);
diff --git a/mm/slub.c b/mm/slub.c
index 084184e706c631..f614b5dc396bc1 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1334,7 +1334,7 @@ static inline struct page *alloc_slab_page(struct kmem_cache *s,
 	if (node == NUMA_NO_NODE)
 		page = alloc_pages(flags, order);
 	else
-		page = alloc_pages_exact_node(node, flags, order);
+		page = __alloc_pages_node(node, flags, order);
 
 	if (!page)
 		memcg_uncharge_slab(s, order);

From 0bc35a970c01c50e3bcc4b5a612787346024e5db Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 8 Sep 2015 15:03:53 -0700
Subject: [PATCH 707/734] mm: unify checks in alloc_pages_node() and
 __alloc_pages_node()

Perform the same debug checks in alloc_pages_node() as are done in
__alloc_pages_node(), by making the former function a wrapper of the
latter one.

In addition to better diagnostics in DEBUG_VM builds for situations
which have been already fatal (e.g.  out-of-bounds node id), there are
two visible changes for potential existing buggy callers of
alloc_pages_node():

- calling alloc_pages_node() with any negative nid (e.g. due to arithmetic
  overflow) was treated as passing NUMA_NO_NODE and fallback to local node was
  applied. This will now be fatal.
- calling alloc_pages_node() with an offline node will now be checked for
  DEBUG_VM builds. Since it's not fatal if the node has been previously online,
  and this patch may expose some existing buggy callers, change the VM_BUG_ON
  in __alloc_pages_node() to VM_WARN_ON.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Christoph Lameter <cl@linux.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index d2c142bc872e58..4a12cae2fb0c7f 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -310,23 +310,23 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
 static inline struct page *
 __alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
 {
-	VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES || !node_online(nid));
+	VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);
+	VM_WARN_ON(!node_online(nid));
 
 	return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask));
 }
 
 /*
  * Allocate pages, preferring the node given as nid. When nid == NUMA_NO_NODE,
- * prefer the current CPU's node.
+ * prefer the current CPU's node. Otherwise node must be valid and online.
  */
 static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
 						unsigned int order)
 {
-	/* Unknown node is current node */
-	if (nid < 0)
+	if (nid == NUMA_NO_NODE)
 		nid = numa_node_id();
 
-	return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask));
+	return __alloc_pages_node(nid, gfp_mask, order);
 }
 
 #ifdef CONFIG_NUMA

From 82c1fc714763b823169958a98196d9be56c63b30 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 8 Sep 2015 15:03:56 -0700
Subject: [PATCH 708/734] mm: use numa_mem_id() in alloc_pages_node()

alloc_pages_node() might fail when called with NUMA_NO_NODE and
__GFP_THISNODE on a CPU belonging to a memoryless node.  To make the
local-node fallback more robust and prevent such situations, use
numa_mem_id(), which was introduced for similar scenarios in the slab
context.

Suggested-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 4a12cae2fb0c7f..f92cbd2f44507a 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -318,13 +318,14 @@ __alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
 
 /*
  * Allocate pages, preferring the node given as nid. When nid == NUMA_NO_NODE,
- * prefer the current CPU's node. Otherwise node must be valid and online.
+ * prefer the current CPU's closest node. Otherwise node must be valid and
+ * online.
  */
 static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
 						unsigned int order)
 {
 	if (nid == NUMA_NO_NODE)
-		nid = numa_node_id();
+		nid = numa_mem_id();
 
 	return __alloc_pages_node(nid, gfp_mask, order);
 }

From 1a16718cf7f4f48ee2aa2cfd9a961c6b433a7b5b Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Tue, 8 Sep 2015 15:03:59 -0700
Subject: [PATCH 709/734] mm/compaction: correct to flush migrated pages if
 pageblock skip happens

We cache isolate_start_pfn before entering isolate_migratepages().  If
pageblock is skipped in isolate_migratepages() due to whatever reason,
cc->migrate_pfn can be far from isolate_start_pfn hence we flush pages
that were freed.  For example, the following scenario can be possible:

- assume order-9 compaction, pageblock order is 9
- start_isolate_pfn is 0x200
- isolate_migratepages()
  - skip a number of pageblocks
  - start to isolate from pfn 0x600
  - cc->migrate_pfn = 0x620
  - return
- last_migrated_pfn is set to 0x200
- check flushing condition
  - current_block_start is set to 0x600
  - last_migrated_pfn < current_block_start then do useless flush

This wrong flush would not help the performance and success rate so this
patch tries to fix it.  One simple way to know the exact position where
we start to isolate migratable pages is that we cache it in
isolate_migratepages() before entering actual isolation.  This patch
implements that and fixes the problem.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/compaction.c | 30 +++++++++++++++---------------
 mm/internal.h   |  1 +
 2 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/mm/compaction.c b/mm/compaction.c
index 8f64d353399007..c5c627aae9962d 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1115,6 +1115,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
 					struct compact_control *cc)
 {
 	unsigned long low_pfn, end_pfn;
+	unsigned long isolate_start_pfn;
 	struct page *page;
 	const isolate_mode_t isolate_mode =
 		(sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) |
@@ -1163,6 +1164,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
 			continue;
 
 		/* Perform the isolation */
+		isolate_start_pfn = low_pfn;
 		low_pfn = isolate_migratepages_block(cc, low_pfn, end_pfn,
 								isolate_mode);
 
@@ -1171,6 +1173,15 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
 			return ISOLATE_ABORT;
 		}
 
+		/*
+		 * Record where we could have freed pages by migration and not
+		 * yet flushed them to buddy allocator.
+		 * - this is the lowest page that could have been isolated and
+		 * then freed by migration.
+		 */
+		if (cc->nr_migratepages && !cc->last_migrated_pfn)
+			cc->last_migrated_pfn = isolate_start_pfn;
+
 		/*
 		 * Either we isolated something and proceed with migration. Or
 		 * we failed and compact_zone should decide if we should
@@ -1342,7 +1353,6 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 	unsigned long end_pfn = zone_end_pfn(zone);
 	const int migratetype = gfpflags_to_migratetype(cc->gfp_mask);
 	const bool sync = cc->mode != MIGRATE_ASYNC;
-	unsigned long last_migrated_pfn = 0;
 
 	ret = compaction_suitable(zone, cc->order, cc->alloc_flags,
 							cc->classzone_idx);
@@ -1380,6 +1390,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 		zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn;
 		zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn;
 	}
+	cc->last_migrated_pfn = 0;
 
 	trace_mm_compaction_begin(start_pfn, cc->migrate_pfn,
 				cc->free_pfn, end_pfn, sync);
@@ -1389,7 +1400,6 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 	while ((ret = compact_finished(zone, cc, migratetype)) ==
 						COMPACT_CONTINUE) {
 		int err;
-		unsigned long isolate_start_pfn = cc->migrate_pfn;
 
 		switch (isolate_migratepages(zone, cc)) {
 		case ISOLATE_ABORT:
@@ -1429,16 +1439,6 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 			}
 		}
 
-		/*
-		 * Record where we could have freed pages by migration and not
-		 * yet flushed them to buddy allocator. We use the pfn that
-		 * isolate_migratepages() started from in this loop iteration
-		 * - this is the lowest page that could have been isolated and
-		 * then freed by migration.
-		 */
-		if (!last_migrated_pfn)
-			last_migrated_pfn = isolate_start_pfn;
-
 check_drain:
 		/*
 		 * Has the migration scanner moved away from the previous
@@ -1447,18 +1447,18 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 		 * compact_finished() can detect immediately if allocation
 		 * would succeed.
 		 */
-		if (cc->order > 0 && last_migrated_pfn) {
+		if (cc->order > 0 && cc->last_migrated_pfn) {
 			int cpu;
 			unsigned long current_block_start =
 				cc->migrate_pfn & ~((1UL << cc->order) - 1);
 
-			if (last_migrated_pfn < current_block_start) {
+			if (cc->last_migrated_pfn < current_block_start) {
 				cpu = get_cpu();
 				lru_add_drain_cpu(cpu);
 				drain_local_pages(zone);
 				put_cpu();
 				/* No more flushing until we migrate again */
-				last_migrated_pfn = 0;
+				cc->last_migrated_pfn = 0;
 			}
 		}
 
diff --git a/mm/internal.h b/mm/internal.h
index 1195dd2d6a2b94..bc0fa9a69e4637 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -182,6 +182,7 @@ struct compact_control {
 	unsigned long nr_migratepages;	/* Number of pages to migrate */
 	unsigned long free_pfn;		/* isolate_freepages search base */
 	unsigned long migrate_pfn;	/* isolate_migratepages search base */
+	unsigned long last_migrated_pfn;/* Not yet flushed page being freed */
 	enum migrate_mode mode;		/* Async or sync migration mode */
 	bool ignore_skip_hint;		/* Scan blocks even if marked skip */
 	int order;			/* order a direct compactor needs */

From c11539315129b599aae6d9bc0f941dee5559ec58 Mon Sep 17 00:00:00 2001
From: Alexander Kuleshov <kuleshovmail@gmail.com>
Date: Tue, 8 Sep 2015 15:04:02 -0700
Subject: [PATCH 710/734] mm/memblock.c: fiy typos in comments

s/succees/success/

Signed-off-by: Alexander Kuleshov <kuleshovmail@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memblock.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mm/memblock.c b/mm/memblock.c
index 69babe22eef798..31b06c67b1b167 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -762,7 +762,7 @@ int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size)
  *
  * This function isolates region [@base, @base + @size), and sets/clears flag
  *
- * Return 0 on succees, -errno on failure.
+ * Return 0 on success, -errno on failure.
  */
 static int __init_memblock memblock_setclr_flag(phys_addr_t base,
 				phys_addr_t size, int set, int flag)
@@ -789,7 +789,7 @@ static int __init_memblock memblock_setclr_flag(phys_addr_t base,
  * @base: the base phys addr of the region
  * @size: the size of the region
  *
- * Return 0 on succees, -errno on failure.
+ * Return 0 on success, -errno on failure.
  */
 int __init_memblock memblock_mark_hotplug(phys_addr_t base, phys_addr_t size)
 {
@@ -801,7 +801,7 @@ int __init_memblock memblock_mark_hotplug(phys_addr_t base, phys_addr_t size)
  * @base: the base phys addr of the region
  * @size: the size of the region
  *
- * Return 0 on succees, -errno on failure.
+ * Return 0 on success, -errno on failure.
  */
 int __init_memblock memblock_clear_hotplug(phys_addr_t base, phys_addr_t size)
 {
@@ -813,7 +813,7 @@ int __init_memblock memblock_clear_hotplug(phys_addr_t base, phys_addr_t size)
  * @base: the base phys addr of the region
  * @size: the size of the region
  *
- * Return 0 on succees, -errno on failure.
+ * Return 0 on success, -errno on failure.
  */
 int __init_memblock memblock_mark_mirror(phys_addr_t base, phys_addr_t size)
 {

From bde43c6c9f4f360ae549a0ed9f10a3e62e363aca Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Tue, 8 Sep 2015 15:04:05 -0700
Subject: [PATCH 711/734] mm/khugepaged: allow interruption of allocation sleep
 again

Commit 1dfb059b9438 ("thp: reduce khugepaged freezing latency") fixed
khugepaged to do not block a system suspend.  But the result is that it
could not get interrupted before the given timeout because the condition
for the wait event is "false".

This patch puts back the original approach but it uses
freezable_schedule_timeout_interruptible() instead of
schedule_timeout_interruptible().  It does the right thing.  I am pretty
sure that the freezable variant was not used in the original fix only
because it was not available at that time.

The regression has been there for ages.  It was not critical.  It just
did the allocation throttling a little bit more aggressively.

I found this problem when converting the kthread to kthread worker API
and trying to understand the code.

This bug is thought to have minimal userspace-visible impact.  Somebody
could set a high alloc_sleep value by mistake, and then try to fix it
back, but khugepaged would keep sleeping until the high value expires.

Signed-off-by: Petr Mladek <pmladek@suse.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Ebru Akagunduz <ebru.akagunduz@gmail.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/huge_memory.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 883f613ada7e2a..b16279cbd91df6 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2326,8 +2326,12 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
 
 static void khugepaged_alloc_sleep(void)
 {
-	wait_event_freezable_timeout(khugepaged_wait, false,
-			msecs_to_jiffies(khugepaged_alloc_sleep_millisecs));
+	DEFINE_WAIT(wait);
+
+	add_wait_queue(&khugepaged_wait, &wait);
+	freezable_schedule_timeout_interruptible(
+		msecs_to_jiffies(khugepaged_alloc_sleep_millisecs));
+	remove_wait_queue(&khugepaged_wait, &wait);
 }
 
 static int khugepaged_node_load[MAX_NUMNODES];

From c9d13f5fc748a02cb5917a798f065681007342b9 Mon Sep 17 00:00:00 2001
From: Chen Gang <gang.chen.5i5j@gmail.com>
Date: Tue, 8 Sep 2015 15:04:08 -0700
Subject: [PATCH 712/734] mm/mmap.c:insert_vm_struct(): check for failure
 before setting values

There's no point in initializing vma->vm_pgoff if the insertion attempt
will be failing anyway.  Run the checks before performing the
initialization.

Signed-off-by: Chen Gang <gang.chen.5i5j@gmail.com>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/mmap.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/mm/mmap.c b/mm/mmap.c
index 7a3b12399f0636..b6be3249f0a923 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2871,6 +2871,13 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
 	struct vm_area_struct *prev;
 	struct rb_node **rb_link, *rb_parent;
 
+	if (find_vma_links(mm, vma->vm_start, vma->vm_end,
+			   &prev, &rb_link, &rb_parent))
+		return -ENOMEM;
+	if ((vma->vm_flags & VM_ACCOUNT) &&
+	     security_vm_enough_memory_mm(mm, vma_pages(vma)))
+		return -ENOMEM;
+
 	/*
 	 * The vm_pgoff of a purely anonymous vma should be irrelevant
 	 * until its first write fault, when page's anon_vma and index
@@ -2887,12 +2894,6 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
 		BUG_ON(vma->anon_vma);
 		vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
 	}
-	if (find_vma_links(mm, vma->vm_start, vma->vm_end,
-			   &prev, &rb_link, &rb_parent))
-		return -ENOMEM;
-	if ((vma->vm_flags & VM_ACCOUNT) &&
-	     security_vm_enough_memory_mm(mm, vma_pages(vma)))
-		return -ENOMEM;
 
 	vma_link(mm, vma, prev, rb_link, rb_parent);
 	return 0;

From 013110a73dcf970cb28c5b0a79f9eee577ea6aa2 Mon Sep 17 00:00:00 2001
From: Yaowei Bai <bywxiaobai@163.com>
Date: Tue, 8 Sep 2015 15:04:10 -0700
Subject: [PATCH 713/734] mm/page_alloc.c: fix a misleading comment

The comment says that the per-cpu batchsize and zone watermarks are
determined by present_pages which is definitely wrong, they are both
calculated from managed_pages.  Fix it.

Signed-off-by: Yaowei Bai <bywxiaobai@163.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/sysctl/vm.txt | 4 ++--
 mm/page_alloc.c             | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 9c3f2f8054b5f9..a4482fceacecc9 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -349,7 +349,7 @@ zone[i]'s protection[j] is calculated by following expression.
 
 (i < j):
   zone[i]->protection[j]
-  = (total sums of present_pages from zone[i+1] to zone[j] on the node)
+  = (total sums of managed_pages from zone[i+1] to zone[j] on the node)
     / lowmem_reserve_ratio[i];
 (i = j):
    (should not be protected. = 0;
@@ -360,7 +360,7 @@ The default values of lowmem_reserve_ratio[i] are
     256 (if zone[i] means DMA or DMA32 zone)
     32  (others).
 As above expression, they are reciprocal number of ratio.
-256 means 1/256. # of protection pages becomes about "0.39%" of total present
+256 means 1/256. # of protection pages becomes about "0.39%" of total managed
 pages of higher zones on the node.
 
 If you would like to protect more pages, smaller values are effective.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index bdaa0cf8fd4129..59abb47b70ee4e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6022,7 +6022,7 @@ void __init mem_init_print_info(const char *str)
  * set_dma_reserve - set the specified number of pages reserved in the first zone
  * @new_dma_reserve: The number of pages to mark reserved
  *
- * The per-cpu batchsize and zone watermarks are determined by present_pages.
+ * The per-cpu batchsize and zone watermarks are determined by managed_pages.
  * In the DMA zone, a significant percentage may be consumed by kernel image
  * and other unfreeable allocations which can skew the watermarks badly. This
  * function may optionally be used to account for unfreeable pages in the

From 34b100605cb7e201d5c4e39f54d0e11caa950733 Mon Sep 17 00:00:00 2001
From: Yaowei Bai <bywxiaobai@163.com>
Date: Tue, 8 Sep 2015 15:04:13 -0700
Subject: [PATCH 714/734] mm/page_alloc.c: change
 sysctl_lower_zone_reserve_ratio to sysctl_lowmem_reserve_ratio in comments

We use sysctl_lowmem_reserve_ratio rather than
sysctl_lower_zone_reserve_ratio to determine how aggressive the kernel
is in defending lowmem from the possibility of being captured into
pinned user memory.  To avoid misleading, correct it in some comments.

Signed-off-by: Yaowei Bai <bywxiaobai@163.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 59abb47b70ee4e..5e8e99dd595a57 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6075,7 +6075,7 @@ void __init page_alloc_init(void)
 }
 
 /*
- * calculate_totalreserve_pages - called when sysctl_lower_zone_reserve_ratio
+ * calculate_totalreserve_pages - called when sysctl_lowmem_reserve_ratio
  *	or min_free_kbytes changes.
  */
 static void calculate_totalreserve_pages(void)
@@ -6119,7 +6119,7 @@ static void calculate_totalreserve_pages(void)
 
 /*
  * setup_per_zone_lowmem_reserve - called whenever
- *	sysctl_lower_zone_reserve_ratio changes.  Ensures that each zone
+ *	sysctl_lowmem_reserve_ratio changes.  Ensures that each zone
  *	has a correct pages reserved value, so an adequate number of
  *	pages are left in the zone after a successful __alloc_pages().
  */

From b5685e9263a6f3a8da546b8a46382f18a63745c9 Mon Sep 17 00:00:00 2001
From: Xishi Qiu <qiuxishi@huawei.com>
Date: Tue, 8 Sep 2015 15:04:16 -0700
Subject: [PATCH 715/734] memory-hotplug: fix comments in
 zone_spanned_pages_in_node() and zone_spanned_pages_in_node()

When hot adding a node from add_memory(), we will add memblock first, so
the node is not empty.  But when called from cpu_up(), the node should
be empty.

Signed-off-by: Xishi Qiu <qiuxishi@huawei.com>
Cc: Tang Chen <tangchen@cn.fujitsu.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Taku Izumi <izumi.taku@jp.fujitsu.com>\
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5e8e99dd595a57..b0fda2b9ca76bc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5085,7 +5085,7 @@ static unsigned long __meminit zone_spanned_pages_in_node(int nid,
 {
 	unsigned long zone_start_pfn, zone_end_pfn;
 
-	/* When hotadd a new node, the node should be empty */
+	/* When hotadd a new node from cpu_up(), the node should be empty */
 	if (!node_start_pfn && !node_end_pfn)
 		return 0;
 
@@ -5152,7 +5152,7 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
 	unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type];
 	unsigned long zone_start_pfn, zone_end_pfn;
 
-	/* When hotadd a new node, the node should be empty */
+	/* When hotadd a new node from cpu_up(), the node should be empty */
 	if (!node_start_pfn && !node_end_pfn)
 		return 0;
 

From 4ada0c5a2daf11816180ec30bdbdbed1f6ff3224 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Tue, 8 Sep 2015 15:04:19 -0700
Subject: [PATCH 716/734] mm/page_alloc.c: fix type information of memoryless
 node

For a memoryless node, the output of get_pfn_range_for_nid are all zero.
It will display mem from 0 to -1.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b0fda2b9ca76bc..4a4c399bacebe1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5476,7 +5476,8 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 	get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
 	pr_info("Initmem setup node %d [mem %#018Lx-%#018Lx]\n", nid,
-		(u64)start_pfn << PAGE_SHIFT, ((u64)end_pfn << PAGE_SHIFT) - 1);
+		(u64)start_pfn << PAGE_SHIFT,
+		end_pfn ? ((u64)end_pfn << PAGE_SHIFT) - 1 : 0);
 #endif
 	calculate_node_totalpages(pgdat, start_pfn, end_pfn,
 				  zones_size, zholes_size);

From ad5ea8cd5b934cc082f2cda900b490def149908e Mon Sep 17 00:00:00 2001
From: Alexander Kuleshov <kuleshovmail@gmail.com>
Date: Tue, 8 Sep 2015 15:04:22 -0700
Subject: [PATCH 717/734] mm/memblock.c: fix comment in __next_mem_range()

Signed-off-by: Alexander Kuleshov <kuleshovmail@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memblock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/memblock.c b/mm/memblock.c
index 31b06c67b1b167..1c7b647e58971e 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -976,7 +976,7 @@ void __init_memblock __next_mem_range(u64 *idx, int nid, ulong flags,
  * in type_b.
  *
  * @idx: pointer to u64 loop variable
- * @nid: nid: node selector, %NUMA_NO_NODE for all nodes
+ * @nid: node selector, %NUMA_NO_NODE for all nodes
  * @flags: pick from blocks based on memory attributes
  * @type_a: pointer to memblock_type from where the range is taken
  * @type_b: pointer to memblock_type which excludes memory from being taken

From b430d1fd6c7d22cc07e7c22a2ee1078667605313 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Date: Tue, 8 Sep 2015 15:04:24 -0700
Subject: [PATCH 718/734] zsmalloc: drop unused variable `nr_to_migrate'

This patchset tweaks compaction and makes it possible to trigger pool
compaction automatically when system is getting low on memory.

zsmalloc in some cases can suffer from a notable fragmentation and
compaction can release some considerable amount of memory.  The problem
here is that currently we fully rely on user space to perform compaction
when needed.  However, performing zsmalloc compaction is not always an
obvious thing to do.  For example, suppose we have a `idle' fragmented
(compaction was never performed) zram device and system is getting low
on memory due to some 3rd party user processes (gcc LTO, or firefox,
etc.).  It's quite unlikely that user space will issue zpool compaction
in this case.  Besides, user space cannot tell for sure how badly pool
is fragmented; however, this info is known to zsmalloc and, hence, to a
shrinker.

This patch (of 7):

__zs_compact() does not use `nr_to_migrate', drop it.

Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/zsmalloc.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 0a7f81aa2249c2..7d816c2d74f9dc 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1703,7 +1703,6 @@ static struct page *isolate_source_page(struct size_class *class)
 static unsigned long __zs_compact(struct zs_pool *pool,
 				struct size_class *class)
 {
-	int nr_to_migrate;
 	struct zs_compact_control cc;
 	struct page *src_page;
 	struct page *dst_page = NULL;
@@ -1714,8 +1713,6 @@ static unsigned long __zs_compact(struct zs_pool *pool,
 
 		BUG_ON(!is_first_page(src_page));
 
-		/* The goal is to migrate all live objects in source page */
-		nr_to_migrate = src_page->inuse;
 		cc.index = 0;
 		cc.s_page = src_page;
 
@@ -1730,7 +1727,6 @@ static unsigned long __zs_compact(struct zs_pool *pool,
 
 			putback_zspage(pool, class, dst_page);
 			nr_total_migrated += cc.nr_migrated;
-			nr_to_migrate -= cc.nr_migrated;
 		}
 
 		/* Stop if we couldn't find slot */

From 57244594195fe697f9261c7970ca25db35280967 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Date: Tue, 8 Sep 2015 15:04:27 -0700
Subject: [PATCH 719/734] zsmalloc: always keep per-class stats

Always account per-class `zs_size_stat' stats.  This data will help us
make better decisions during compaction.  We are especially interested
in OBJ_ALLOCATED and OBJ_USED, which can tell us if class compaction
will result in any memory gain.

For instance, we know the number of allocated objects in the class, the
number of objects being used (so we also know how many objects are not
used) and the number of objects per-page.  So we can ensure if we have
enough unused objects to form at least one ZS_EMPTY zspage during
compaction.

We calculate this value on per-class basis so we can calculate a total
number of zspages that can be released.  Which is exactly what a
shrinker wants to know.

Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/zsmalloc.c | 40 ++++++++--------------------------------
 1 file changed, 8 insertions(+), 32 deletions(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 7d816c2d74f9dc..1227f8323e93c7 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -169,14 +169,12 @@ enum zs_stat_type {
 	NR_ZS_STAT_TYPE,
 };
 
-#ifdef CONFIG_ZSMALLOC_STAT
-
-static struct dentry *zs_stat_root;
-
 struct zs_size_stat {
 	unsigned long objs[NR_ZS_STAT_TYPE];
 };
 
+#ifdef CONFIG_ZSMALLOC_STAT
+static struct dentry *zs_stat_root;
 #endif
 
 /*
@@ -201,6 +199,8 @@ static int zs_size_classes;
 static const int fullness_threshold_frac = 4;
 
 struct size_class {
+	spinlock_t lock;
+	struct page *fullness_list[_ZS_NR_FULLNESS_GROUPS];
 	/*
 	 * Size of objects stored in this class. Must be multiple
 	 * of ZS_ALIGN.
@@ -210,16 +210,10 @@ struct size_class {
 
 	/* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */
 	int pages_per_zspage;
-	/* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */
-	bool huge;
-
-#ifdef CONFIG_ZSMALLOC_STAT
 	struct zs_size_stat stats;
-#endif
-
-	spinlock_t lock;
 
-	struct page *fullness_list[_ZS_NR_FULLNESS_GROUPS];
+	/* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */
+	bool huge;
 };
 
 /*
@@ -441,8 +435,6 @@ static int get_size_class_index(int size)
 	return min(zs_size_classes - 1, idx);
 }
 
-#ifdef CONFIG_ZSMALLOC_STAT
-
 static inline void zs_stat_inc(struct size_class *class,
 				enum zs_stat_type type, unsigned long cnt)
 {
@@ -461,6 +453,8 @@ static inline unsigned long zs_stat_get(struct size_class *class,
 	return class->stats.objs[type];
 }
 
+#ifdef CONFIG_ZSMALLOC_STAT
+
 static int __init zs_stat_init(void)
 {
 	if (!debugfs_initialized())
@@ -576,23 +570,6 @@ static void zs_pool_stat_destroy(struct zs_pool *pool)
 }
 
 #else /* CONFIG_ZSMALLOC_STAT */
-
-static inline void zs_stat_inc(struct size_class *class,
-				enum zs_stat_type type, unsigned long cnt)
-{
-}
-
-static inline void zs_stat_dec(struct size_class *class,
-				enum zs_stat_type type, unsigned long cnt)
-{
-}
-
-static inline unsigned long zs_stat_get(struct size_class *class,
-				enum zs_stat_type type)
-{
-	return 0;
-}
-
 static int __init zs_stat_init(void)
 {
 	return 0;
@@ -610,7 +587,6 @@ static inline int zs_pool_stat_create(char *name, struct zs_pool *pool)
 static inline void zs_pool_stat_destroy(struct zs_pool *pool)
 {
 }
-
 #endif
 
 
From 04f05909e0fde36ba481ad4c850b666ebef1ac55 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Date: Tue, 8 Sep 2015 15:04:30 -0700
Subject: [PATCH 720/734] zsmalloc: introduce zs_can_compact() function

This function checks if class compaction will free any pages.
Rephrasing -- do we have enough unused objects to form at least one
ZS_EMPTY page and free it.  It aborts compaction if class compaction
will not result in any (further) savings.

EXAMPLE (this debug output is not part of this patch set):

 - class size
 - number of allocated objects
 - number of used objects
 - max objects per zspage
 - pages per zspage
 - estimated number of pages that will be freed

[..]
class-512 objs:544 inuse:540 maxobj-per-zspage:8  pages-per-zspage:1 zspages-to-free:0
 ... class-512 compaction is useless. break
class-496 objs:660 inuse:570 maxobj-per-zspage:33 pages-per-zspage:4 zspages-to-free:2
class-496 objs:627 inuse:570 maxobj-per-zspage:33 pages-per-zspage:4 zspages-to-free:1
class-496 objs:594 inuse:570 maxobj-per-zspage:33 pages-per-zspage:4 zspages-to-free:0
 ... class-496 compaction is useless. break
class-448 objs:657 inuse:617 maxobj-per-zspage:9  pages-per-zspage:1 zspages-to-free:4
class-448 objs:648 inuse:617 maxobj-per-zspage:9  pages-per-zspage:1 zspages-to-free:3
class-448 objs:639 inuse:617 maxobj-per-zspage:9  pages-per-zspage:1 zspages-to-free:2
class-448 objs:630 inuse:617 maxobj-per-zspage:9  pages-per-zspage:1 zspages-to-free:1
class-448 objs:621 inuse:617 maxobj-per-zspage:9  pages-per-zspage:1 zspages-to-free:0
 ... class-448 compaction is useless. break
class-432 objs:728 inuse:685 maxobj-per-zspage:28 pages-per-zspage:3 zspages-to-free:1
class-432 objs:700 inuse:685 maxobj-per-zspage:28 pages-per-zspage:3 zspages-to-free:0
 ... class-432 compaction is useless. break
class-416 objs:819 inuse:705 maxobj-per-zspage:39 pages-per-zspage:4 zspages-to-free:2
class-416 objs:780 inuse:705 maxobj-per-zspage:39 pages-per-zspage:4 zspages-to-free:1
class-416 objs:741 inuse:705 maxobj-per-zspage:39 pages-per-zspage:4 zspages-to-free:0
 ... class-416 compaction is useless. break
class-400 objs:690 inuse:674 maxobj-per-zspage:10 pages-per-zspage:1 zspages-to-free:1
class-400 objs:680 inuse:674 maxobj-per-zspage:10 pages-per-zspage:1 zspages-to-free:0
 ... class-400 compaction is useless. break
class-384 objs:736 inuse:709 maxobj-per-zspage:32 pages-per-zspage:3 zspages-to-free:0
 ... class-384 compaction is useless. break
[..]

Every "compaction is useless" indicates that we saved CPU cycles.

class-512 has
	544	object allocated
	540	objects used
	8	objects per-page

Even if we have a ALMOST_EMPTY zspage, we still don't have enough room to
migrate all of its objects and free this zspage; so compaction will not
make a lot of sense, it's better to just leave it as is.

Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/zsmalloc.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 1227f8323e93c7..4b39e5eaf34f03 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1676,6 +1676,29 @@ static struct page *isolate_source_page(struct size_class *class)
 	return page;
 }
 
+/*
+ *
+ * Based on the number of unused allocated objects calculate
+ * and return the number of pages that we can free.
+ *
+ * Should be called under class->lock.
+ */
+static unsigned long zs_can_compact(struct size_class *class)
+{
+	unsigned long obj_wasted;
+
+	if (!zs_stat_get(class, CLASS_ALMOST_EMPTY))
+		return 0;
+
+	obj_wasted = zs_stat_get(class, OBJ_ALLOCATED) -
+		zs_stat_get(class, OBJ_USED);
+
+	obj_wasted /= get_maxobj_per_zspage(class->size,
+			class->pages_per_zspage);
+
+	return obj_wasted * get_pages_per_zspage(class->size);
+}
+
 static unsigned long __zs_compact(struct zs_pool *pool,
 				struct size_class *class)
 {
@@ -1689,6 +1712,9 @@ static unsigned long __zs_compact(struct zs_pool *pool,
 
 		BUG_ON(!is_first_page(src_page));
 
+		if (!zs_can_compact(class))
+			break;
+
 		cc.index = 0;
 		cc.s_page = src_page;
 

From 0dc63d488a2a433a4a85d3908b3f195c4e6450d2 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Date: Tue, 8 Sep 2015 15:04:33 -0700
Subject: [PATCH 721/734] zsmalloc: cosmetic compaction code adjustments

Change zs_object_copy() argument order to be (DST, SRC) rather than
(SRC, DST).  copy/move functions usually have (to, from) arguments
order.

Rename alloc_target_page() to isolate_target_page().  This function
doesn't allocate anything, it isolates target page, pretty much like
isolate_source_page().

Tweak __zs_compact() comment.

Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/zsmalloc.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 4b39e5eaf34f03..2a1f95249f1291 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1471,7 +1471,7 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
 }
 EXPORT_SYMBOL_GPL(zs_free);
 
-static void zs_object_copy(unsigned long src, unsigned long dst,
+static void zs_object_copy(unsigned long dst, unsigned long src,
 				struct size_class *class)
 {
 	struct page *s_page, *d_page;
@@ -1612,7 +1612,7 @@ static int migrate_zspage(struct zs_pool *pool, struct size_class *class,
 
 		used_obj = handle_to_obj(handle);
 		free_obj = obj_malloc(d_page, class, handle);
-		zs_object_copy(used_obj, free_obj, class);
+		zs_object_copy(free_obj, used_obj, class);
 		index++;
 		record_obj(handle, free_obj);
 		unpin_tag(handle);
@@ -1628,7 +1628,7 @@ static int migrate_zspage(struct zs_pool *pool, struct size_class *class,
 	return ret;
 }
 
-static struct page *alloc_target_page(struct size_class *class)
+static struct page *isolate_target_page(struct size_class *class)
 {
 	int i;
 	struct page *page;
@@ -1718,11 +1718,11 @@ static unsigned long __zs_compact(struct zs_pool *pool,
 		cc.index = 0;
 		cc.s_page = src_page;
 
-		while ((dst_page = alloc_target_page(class))) {
+		while ((dst_page = isolate_target_page(class))) {
 			cc.d_page = dst_page;
 			/*
-			 * If there is no more space in dst_page, try to
-			 * allocate another zspage.
+			 * If there is no more space in dst_page, resched
+			 * and see if anyone had allocated another zspage.
 			 */
 			if (!migrate_zspage(pool, class, &cc))
 				break;

From 7d3f3938236b4bb878214e6791e76fd8409bdeee Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Date: Tue, 8 Sep 2015 15:04:35 -0700
Subject: [PATCH 722/734] zsmalloc/zram: introduce zs_pool_stats api

`zs_compact_control' accounts the number of migrated objects but it has
a limited lifespan -- we lose it as soon as zs_compaction() returns back
to zram.  It worked fine, because (a) zram had it's own counter of
migrated objects and (b) only zram could trigger compaction.  However,
this does not work for automatic pool compaction (not issued by zram).
To account objects migrated during auto-compaction (issued by the
shrinker) we need to store this number in zs_pool.

Define a new `struct zs_pool_stats' structure to keep zs_pool's stats
there.  It provides only `num_migrated', as of this writing, but it
surely can be extended.

A new zsmalloc zs_pool_stats() symbol exports zs_pool's stats back to
caller.

Use zs_pool_stats() in zram and remove `num_migrated' from zram_stats.

Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Suggested-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/block/zram/zram_drv.c | 15 +++++++++------
 drivers/block/zram/zram_drv.h |  1 -
 include/linux/zsmalloc.h      |  6 ++++++
 mm/zsmalloc.c                 | 29 +++++++++++++++--------------
 4 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 9c01f5bfa33fc9..bcde5c321090c3 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -388,7 +388,6 @@ static ssize_t comp_algorithm_store(struct device *dev,
 static ssize_t compact_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t len)
 {
-	unsigned long nr_migrated;
 	struct zram *zram = dev_to_zram(dev);
 	struct zram_meta *meta;
 
@@ -399,8 +398,7 @@ static ssize_t compact_store(struct device *dev,
 	}
 
 	meta = zram->meta;
-	nr_migrated = zs_compact(meta->mem_pool);
-	atomic64_add(nr_migrated, &zram->stats.num_migrated);
+	zs_compact(meta->mem_pool);
 	up_read(&zram->init_lock);
 
 	return len;
@@ -428,26 +426,31 @@ static ssize_t mm_stat_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
 	struct zram *zram = dev_to_zram(dev);
+	struct zs_pool_stats pool_stats;
 	u64 orig_size, mem_used = 0;
 	long max_used;
 	ssize_t ret;
 
+	memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));
+
 	down_read(&zram->init_lock);
-	if (init_done(zram))
+	if (init_done(zram)) {
 		mem_used = zs_get_total_pages(zram->meta->mem_pool);
+		zs_pool_stats(zram->meta->mem_pool, &pool_stats);
+	}
 
 	orig_size = atomic64_read(&zram->stats.pages_stored);
 	max_used = atomic_long_read(&zram->stats.max_used_pages);
 
 	ret = scnprintf(buf, PAGE_SIZE,
-			"%8llu %8llu %8llu %8lu %8ld %8llu %8llu\n",
+			"%8llu %8llu %8llu %8lu %8ld %8llu %8lu\n",
 			orig_size << PAGE_SHIFT,
 			(u64)atomic64_read(&zram->stats.compr_data_size),
 			mem_used << PAGE_SHIFT,
 			zram->limit_pages << PAGE_SHIFT,
 			max_used << PAGE_SHIFT,
 			(u64)atomic64_read(&zram->stats.zero_pages),
-			(u64)atomic64_read(&zram->stats.num_migrated));
+			pool_stats.num_migrated);
 	up_read(&zram->init_lock);
 
 	return ret;
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 6dbe2df506bf0b..8e92339686d746 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -78,7 +78,6 @@ struct zram_stats {
 	atomic64_t compr_data_size;	/* compressed size of pages stored */
 	atomic64_t num_reads;	/* failed + successful */
 	atomic64_t num_writes;	/* --do-- */
-	atomic64_t num_migrated;	/* no. of migrated object */
 	atomic64_t failed_reads;	/* can happen when memory is too low */
 	atomic64_t failed_writes;	/* can happen when memory is too low */
 	atomic64_t invalid_io;	/* non-page-aligned I/O requests */
diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
index 1338190b547838..ad3d23239043c8 100644
--- a/include/linux/zsmalloc.h
+++ b/include/linux/zsmalloc.h
@@ -34,6 +34,11 @@ enum zs_mapmode {
 	 */
 };
 
+struct zs_pool_stats {
+	/* How many objects were migrated */
+	unsigned long num_migrated;
+};
+
 struct zs_pool;
 
 struct zs_pool *zs_create_pool(char *name, gfp_t flags);
@@ -49,4 +54,5 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle);
 unsigned long zs_get_total_pages(struct zs_pool *pool);
 unsigned long zs_compact(struct zs_pool *pool);
 
+void zs_pool_stats(struct zs_pool *pool, struct zs_pool_stats *stats);
 #endif
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 2a1f95249f1291..8f76d8875acaba 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -245,6 +245,7 @@ struct zs_pool {
 	gfp_t flags;	/* allocation flags used when growing pool */
 	atomic_long_t pages_allocated;
 
+	struct zs_pool_stats stats;
 #ifdef CONFIG_ZSMALLOC_STAT
 	struct dentry *stat_dentry;
 #endif
@@ -1578,7 +1579,7 @@ struct zs_compact_control {
 	 /* Starting object index within @s_page which used for live object
 	  * in the subpage. */
 	int index;
-	/* how many of objects are migrated */
+	/* How many of objects were migrated */
 	int nr_migrated;
 };
 
@@ -1590,7 +1591,6 @@ static int migrate_zspage(struct zs_pool *pool, struct size_class *class,
 	struct page *s_page = cc->s_page;
 	struct page *d_page = cc->d_page;
 	unsigned long index = cc->index;
-	int nr_migrated = 0;
 	int ret = 0;
 
 	while (1) {
@@ -1617,13 +1617,12 @@ static int migrate_zspage(struct zs_pool *pool, struct size_class *class,
 		record_obj(handle, free_obj);
 		unpin_tag(handle);
 		obj_free(pool, class, used_obj);
-		nr_migrated++;
+		cc->nr_migrated++;
 	}
 
 	/* Remember last position in this iteration */
 	cc->s_page = s_page;
 	cc->index = index;
-	cc->nr_migrated = nr_migrated;
 
 	return ret;
 }
@@ -1699,14 +1698,13 @@ static unsigned long zs_can_compact(struct size_class *class)
 	return obj_wasted * get_pages_per_zspage(class->size);
 }
 
-static unsigned long __zs_compact(struct zs_pool *pool,
-				struct size_class *class)
+static void __zs_compact(struct zs_pool *pool, struct size_class *class)
 {
 	struct zs_compact_control cc;
 	struct page *src_page;
 	struct page *dst_page = NULL;
-	unsigned long nr_total_migrated = 0;
 
+	cc.nr_migrated = 0;
 	spin_lock(&class->lock);
 	while ((src_page = isolate_source_page(class))) {
 
@@ -1728,7 +1726,6 @@ static unsigned long __zs_compact(struct zs_pool *pool,
 				break;
 
 			putback_zspage(pool, class, dst_page);
-			nr_total_migrated += cc.nr_migrated;
 		}
 
 		/* Stop if we couldn't find slot */
@@ -1738,7 +1735,6 @@ static unsigned long __zs_compact(struct zs_pool *pool,
 		putback_zspage(pool, class, dst_page);
 		putback_zspage(pool, class, src_page);
 		spin_unlock(&class->lock);
-		nr_total_migrated += cc.nr_migrated;
 		cond_resched();
 		spin_lock(&class->lock);
 	}
@@ -1746,15 +1742,14 @@ static unsigned long __zs_compact(struct zs_pool *pool,
 	if (src_page)
 		putback_zspage(pool, class, src_page);
 
-	spin_unlock(&class->lock);
+	pool->stats.num_migrated += cc.nr_migrated;
 
-	return nr_total_migrated;
+	spin_unlock(&class->lock);
 }
 
 unsigned long zs_compact(struct zs_pool *pool)
 {
 	int i;
-	unsigned long nr_migrated = 0;
 	struct size_class *class;
 
 	for (i = zs_size_classes - 1; i >= 0; i--) {
@@ -1763,13 +1758,19 @@ unsigned long zs_compact(struct zs_pool *pool)
 			continue;
 		if (class->index != i)
 			continue;
-		nr_migrated += __zs_compact(pool, class);
+		__zs_compact(pool, class);
 	}
 
-	return nr_migrated;
+	return pool->stats.num_migrated;
 }
 EXPORT_SYMBOL_GPL(zs_compact);
 
+void zs_pool_stats(struct zs_pool *pool, struct zs_pool_stats *stats)
+{
+	memcpy(stats, &pool->stats, sizeof(struct zs_pool_stats));
+}
+EXPORT_SYMBOL_GPL(zs_pool_stats);
+
 /**
  * zs_create_pool - Creates an allocation pool to work from.
  * @flags: allocation flags used to allocate pool metadata

From 860c707dca155a56dfa115ddd6c00959296144a6 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Date: Tue, 8 Sep 2015 15:04:38 -0700
Subject: [PATCH 723/734] zsmalloc: account the number of compacted pages

Compaction returns back to zram the number of migrated objects, which is
quite uninformative -- we have objects of different sizes so user space
cannot obtain any valuable data from that number.  Change compaction to
operate in terms of pages and return back to compaction issuer the
number of pages that were freed during compaction.  So from now on we
will export more meaningful value in zram<id>/mm_stat -- the number of
freed (compacted) pages.

This requires:
 (a) a rename of `num_migrated' to 'pages_compacted'
 (b) a internal API change -- return first_page's fullness_group from
     putback_zspage(), so we know when putback_zspage() did
     free_zspage().  It helps us to account compaction stats correctly.

Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/blockdev/zram.txt |  3 ++-
 drivers/block/zram/zram_drv.c   |  2 +-
 include/linux/zsmalloc.h        |  4 ++--
 mm/zsmalloc.c                   | 27 +++++++++++++++++----------
 4 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt
index c4de576093affe..62435bb252660f 100644
--- a/Documentation/blockdev/zram.txt
+++ b/Documentation/blockdev/zram.txt
@@ -144,7 +144,8 @@ mem_used_max      RW    the maximum amount memory zram have consumed to
                         store compressed data
 mem_limit         RW    the maximum amount of memory ZRAM can use to store
                         the compressed data
-num_migrated      RO    the number of objects migrated migrated by compaction
+pages_compacted   RO    the number of pages freed during compaction
+                        (available only via zram<id>/mm_stat node)
 compact           WO    trigger memory compaction
 
 WARNING
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index bcde5c321090c3..f1c4bb34e007e1 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -450,7 +450,7 @@ static ssize_t mm_stat_show(struct device *dev,
 			zram->limit_pages << PAGE_SHIFT,
 			max_used << PAGE_SHIFT,
 			(u64)atomic64_read(&zram->stats.zero_pages),
-			pool_stats.num_migrated);
+			pool_stats.pages_compacted);
 	up_read(&zram->init_lock);
 
 	return ret;
diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
index ad3d23239043c8..6398dfae53f103 100644
--- a/include/linux/zsmalloc.h
+++ b/include/linux/zsmalloc.h
@@ -35,8 +35,8 @@ enum zs_mapmode {
 };
 
 struct zs_pool_stats {
-	/* How many objects were migrated */
-	unsigned long num_migrated;
+	/* How many pages were migrated (freed) */
+	unsigned long pages_compacted;
 };
 
 struct zs_pool;
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 8f76d8875acaba..b7b4a5612ec7ab 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1579,8 +1579,6 @@ struct zs_compact_control {
 	 /* Starting object index within @s_page which used for live object
 	  * in the subpage. */
 	int index;
-	/* How many of objects were migrated */
-	int nr_migrated;
 };
 
 static int migrate_zspage(struct zs_pool *pool, struct size_class *class,
@@ -1617,7 +1615,6 @@ static int migrate_zspage(struct zs_pool *pool, struct size_class *class,
 		record_obj(handle, free_obj);
 		unpin_tag(handle);
 		obj_free(pool, class, used_obj);
-		cc->nr_migrated++;
 	}
 
 	/* Remember last position in this iteration */
@@ -1643,8 +1640,17 @@ static struct page *isolate_target_page(struct size_class *class)
 	return page;
 }
 
-static void putback_zspage(struct zs_pool *pool, struct size_class *class,
-				struct page *first_page)
+/*
+ * putback_zspage - add @first_page into right class's fullness list
+ * @pool: target pool
+ * @class: destination class
+ * @first_page: target page
+ *
+ * Return @fist_page's fullness_group
+ */
+static enum fullness_group putback_zspage(struct zs_pool *pool,
+			struct size_class *class,
+			struct page *first_page)
 {
 	enum fullness_group fullness;
 
@@ -1662,6 +1668,8 @@ static void putback_zspage(struct zs_pool *pool, struct size_class *class,
 
 		free_zspage(first_page);
 	}
+
+	return fullness;
 }
 
 static struct page *isolate_source_page(struct size_class *class)
@@ -1704,7 +1712,6 @@ static void __zs_compact(struct zs_pool *pool, struct size_class *class)
 	struct page *src_page;
 	struct page *dst_page = NULL;
 
-	cc.nr_migrated = 0;
 	spin_lock(&class->lock);
 	while ((src_page = isolate_source_page(class))) {
 
@@ -1733,7 +1740,9 @@ static void __zs_compact(struct zs_pool *pool, struct size_class *class)
 			break;
 
 		putback_zspage(pool, class, dst_page);
-		putback_zspage(pool, class, src_page);
+		if (putback_zspage(pool, class, src_page) == ZS_EMPTY)
+			pool->stats.pages_compacted +=
+				get_pages_per_zspage(class->size);
 		spin_unlock(&class->lock);
 		cond_resched();
 		spin_lock(&class->lock);
@@ -1742,8 +1751,6 @@ static void __zs_compact(struct zs_pool *pool, struct size_class *class)
 	if (src_page)
 		putback_zspage(pool, class, src_page);
 
-	pool->stats.num_migrated += cc.nr_migrated;
-
 	spin_unlock(&class->lock);
 }
 
@@ -1761,7 +1768,7 @@ unsigned long zs_compact(struct zs_pool *pool)
 		__zs_compact(pool, class);
 	}
 
-	return pool->stats.num_migrated;
+	return pool->stats.pages_compacted;
 }
 EXPORT_SYMBOL_GPL(zs_compact);
 

From ab9d306d9c3bf64b1dbad127aa13252cc550f839 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Date: Tue, 8 Sep 2015 15:04:41 -0700
Subject: [PATCH 724/734] zsmalloc: use shrinker to trigger auto-compaction

Perform automatic pool compaction by a shrinker when system is getting
tight on memory.

User-space has a very little knowledge regarding zsmalloc fragmentation
and basically has no mechanism to tell whether compaction will result in
any memory gain.  Another issue is that user space is not always aware
of the fact that system is getting tight on memory.  Which leads to very
uncomfortable scenarios when user space may start issuing compaction
'randomly' or from crontab (for example).  Fragmentation is not always
necessarily bad, allocated and unused objects, after all, may be filled
with the data later, w/o the need of allocating a new zspage.  On the
other hand, we obviously don't want to waste memory when the system
needs it.

Compaction now has a relatively quick pool scan so we are able to
estimate the number of pages that will be freed easily, which makes it
possible to call this function from a shrinker->count_objects()
callback.  We also abort compaction as soon as we detect that we can't
free any pages any more, preventing wasteful objects migrations.

Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Suggested-by: Minchan Kim <minchan@kernel.org>
Acked-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/zsmalloc.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 78 insertions(+)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index b7b4a5612ec7ab..27b9661c8fa67f 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -246,6 +246,14 @@ struct zs_pool {
 	atomic_long_t pages_allocated;
 
 	struct zs_pool_stats stats;
+
+	/* Compact classes */
+	struct shrinker shrinker;
+	/*
+	 * To signify that register_shrinker() was successful
+	 * and unregister_shrinker() will not Oops.
+	 */
+	bool shrinker_enabled;
 #ifdef CONFIG_ZSMALLOC_STAT
 	struct dentry *stat_dentry;
 #endif
@@ -1778,6 +1786,69 @@ void zs_pool_stats(struct zs_pool *pool, struct zs_pool_stats *stats)
 }
 EXPORT_SYMBOL_GPL(zs_pool_stats);
 
+static unsigned long zs_shrinker_scan(struct shrinker *shrinker,
+		struct shrink_control *sc)
+{
+	unsigned long pages_freed;
+	struct zs_pool *pool = container_of(shrinker, struct zs_pool,
+			shrinker);
+
+	pages_freed = pool->stats.pages_compacted;
+	/*
+	 * Compact classes and calculate compaction delta.
+	 * Can run concurrently with a manually triggered
+	 * (by user) compaction.
+	 */
+	pages_freed = zs_compact(pool) - pages_freed;
+
+	return pages_freed ? pages_freed : SHRINK_STOP;
+}
+
+static unsigned long zs_shrinker_count(struct shrinker *shrinker,
+		struct shrink_control *sc)
+{
+	int i;
+	struct size_class *class;
+	unsigned long pages_to_free = 0;
+	struct zs_pool *pool = container_of(shrinker, struct zs_pool,
+			shrinker);
+
+	if (!pool->shrinker_enabled)
+		return 0;
+
+	for (i = zs_size_classes - 1; i >= 0; i--) {
+		class = pool->size_class[i];
+		if (!class)
+			continue;
+		if (class->index != i)
+			continue;
+
+		spin_lock(&class->lock);
+		pages_to_free += zs_can_compact(class);
+		spin_unlock(&class->lock);
+	}
+
+	return pages_to_free;
+}
+
+static void zs_unregister_shrinker(struct zs_pool *pool)
+{
+	if (pool->shrinker_enabled) {
+		unregister_shrinker(&pool->shrinker);
+		pool->shrinker_enabled = false;
+	}
+}
+
+static int zs_register_shrinker(struct zs_pool *pool)
+{
+	pool->shrinker.scan_objects = zs_shrinker_scan;
+	pool->shrinker.count_objects = zs_shrinker_count;
+	pool->shrinker.batch = 0;
+	pool->shrinker.seeks = DEFAULT_SEEKS;
+
+	return register_shrinker(&pool->shrinker);
+}
+
 /**
  * zs_create_pool - Creates an allocation pool to work from.
  * @flags: allocation flags used to allocate pool metadata
@@ -1863,6 +1934,12 @@ struct zs_pool *zs_create_pool(char *name, gfp_t flags)
 	if (zs_pool_stat_create(name, pool))
 		goto err;
 
+	/*
+	 * Not critical, we still can use the pool
+	 * and user can trigger compaction manually.
+	 */
+	if (zs_register_shrinker(pool) == 0)
+		pool->shrinker_enabled = true;
 	return pool;
 
 err:
@@ -1875,6 +1952,7 @@ void zs_destroy_pool(struct zs_pool *pool)
 {
 	int i;
 
+	zs_unregister_shrinker(pool);
 	zs_pool_stat_destroy(pool);
 
 	for (i = 0; i < zs_size_classes; i++) {

From 58f171174625150f3aaad0cddd3e365270b8e1b8 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Date: Tue, 8 Sep 2015 15:04:44 -0700
Subject: [PATCH 725/734] zsmalloc: partial page ordering within a
 fullness_list

We want to see more ZS_FULL pages and less ZS_ALMOST_{FULL, EMPTY}
pages.  Put a page with higher ->inuse count first within its
->fullness_list, which will give us better chances to fill up this page
with new objects (find_get_zspage() return ->fullness_list head for new
object allocation), so some zspages will become ZS_ALMOST_FULL/ZS_FULL
quicker.

It performs a trivial and cheap ->inuse compare which does not slow down
zsmalloc and in the worst case keeps the list pages in no particular
order.

A more expensive solution could sort fullness_list by ->inuse count.

[minchan@kernel.org: code adjustments]
Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/zsmalloc.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 27b9661c8fa67f..615b9b9b45ebfd 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -643,13 +643,22 @@ static void insert_zspage(struct page *page, struct size_class *class,
 	if (fullness >= _ZS_NR_FULLNESS_GROUPS)
 		return;
 
-	head = &class->fullness_list[fullness];
-	if (*head)
-		list_add_tail(&page->lru, &(*head)->lru);
-
-	*head = page;
 	zs_stat_inc(class, fullness == ZS_ALMOST_EMPTY ?
 			CLASS_ALMOST_EMPTY : CLASS_ALMOST_FULL, 1);
+
+	head = &class->fullness_list[fullness];
+	if (!*head) {
+		*head = page;
+		return;
+	}
+
+	/*
+	 * We want to see more ZS_FULL pages and less almost
+	 * empty/full. Put pages with higher ->inuse first.
+	 */
+	list_add_tail(&page->lru, &(*head)->lru);
+	if (page->inuse >= (*head)->inuse)
+		*head = page;
 }
 
 /*

From ad9d5e175a77a253f52a7259a7c918b8351d99f1 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan.kim@lge.com>
Date: Tue, 8 Sep 2015 15:04:47 -0700
Subject: [PATCH 726/734] zsmalloc: consider ZS_ALMOST_FULL as migrate source

There is no reason to prevent select ZS_ALMOST_FULL as migration source
if we cannot find source from ZS_ALMOST_EMPTY.

With this patch, zs_can_compact will return more exact result.

Signed-off-by: Minchan Kim <minchan.kim@lge.com>
Acked-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/zsmalloc.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 615b9b9b45ebfd..c10885ca87a49e 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1691,11 +1691,17 @@ static enum fullness_group putback_zspage(struct zs_pool *pool,
 
 static struct page *isolate_source_page(struct size_class *class)
 {
-	struct page *page;
+	int i;
+	struct page *page = NULL;
+
+	for (i = ZS_ALMOST_EMPTY; i >= ZS_ALMOST_FULL; i--) {
+		page = class->fullness_list[i];
+		if (!page)
+			continue;
 
-	page = class->fullness_list[ZS_ALMOST_EMPTY];
-	if (page)
-		remove_zspage(page, class, ZS_ALMOST_EMPTY);
+		remove_zspage(page, class, i);
+		break;
+	}
 
 	return page;
 }
@@ -1711,9 +1717,6 @@ static unsigned long zs_can_compact(struct size_class *class)
 {
 	unsigned long obj_wasted;
 
-	if (!zs_stat_get(class, CLASS_ALMOST_EMPTY))
-		return 0;
-
 	obj_wasted = zs_stat_get(class, OBJ_ALLOCATED) -
 		zs_stat_get(class, OBJ_USED);
 

From 6cbf16b3b66a61b9c6df8f2ed4ac346cb427f28a Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Tue, 8 Sep 2015 15:04:49 -0700
Subject: [PATCH 727/734] zsmalloc: use class->pages_per_zspage

There is no need to recalcurate pages_per_zspage in runtime.  Just use
class->pages_per_zspage to avoid unnecessary runtime overhead.

Signed-off-by: Minchan Kim <minchan@kernel.org>
Acked-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/zsmalloc.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index c10885ca87a49e..ce08d043becd22 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1723,7 +1723,7 @@ static unsigned long zs_can_compact(struct size_class *class)
 	obj_wasted /= get_maxobj_per_zspage(class->size,
 			class->pages_per_zspage);
 
-	return obj_wasted * get_pages_per_zspage(class->size);
+	return obj_wasted * class->pages_per_zspage;
 }
 
 static void __zs_compact(struct zs_pool *pool, struct size_class *class)
@@ -1761,8 +1761,7 @@ static void __zs_compact(struct zs_pool *pool, struct size_class *class)
 
 		putback_zspage(pool, class, dst_page);
 		if (putback_zspage(pool, class, src_page) == ZS_EMPTY)
-			pool->stats.pages_compacted +=
-				get_pages_per_zspage(class->size);
+			pool->stats.pages_compacted += class->pages_per_zspage;
 		spin_unlock(&class->lock);
 		cond_resched();
 		spin_lock(&class->lock);

From b3e237f1f5a86030c875e186ff19640f4f4f3c63 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Date: Tue, 8 Sep 2015 15:04:52 -0700
Subject: [PATCH 728/734] zsmalloc: do not take class lock in
 zs_shrinker_count()

We can avoid taking class ->lock around zs_can_compact() in
zs_shrinker_count(), because the number that we return back is outdated
in general case, by design.  We have different sources that are able to
change class's state right after we return from zs_can_compact() --
ongoing I/O operations, manually triggered compaction, or two of them
happening simultaneously.

We re-do this calculations during compaction on a per class basis
anyway.

zs_unregister_shrinker() will not return until we have an active
shrinker, so classes won't unexpectedly disappear while
zs_shrinker_count() iterates them.

Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/zsmalloc.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index ce08d043becd22..c19b99c8a45722 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1710,8 +1710,6 @@ static struct page *isolate_source_page(struct size_class *class)
  *
  * Based on the number of unused allocated objects calculate
  * and return the number of pages that we can free.
- *
- * Should be called under class->lock.
  */
 static unsigned long zs_can_compact(struct size_class *class)
 {
@@ -1834,9 +1832,7 @@ static unsigned long zs_shrinker_count(struct shrinker *shrinker,
 		if (class->index != i)
 			continue;
 
-		spin_lock(&class->lock);
 		pages_to_free += zs_can_compact(class);
-		spin_unlock(&class->lock);
 	}
 
 	return pages_to_free;

From cd10add00c1b31cd664a31108a9b395025def50a Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Date: Tue, 8 Sep 2015 15:04:55 -0700
Subject: [PATCH 729/734] zsmalloc: remove null check from
 destroy_handle_cache()

We can pass a NULL cache pointer to kmem_cache_destroy(), because it
NULL-checks its argument now.  Remove redundant test from
destroy_handle_cache().

Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/zsmalloc.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index c19b99c8a45722..089120429c1836 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -288,8 +288,7 @@ static int create_handle_cache(struct zs_pool *pool)
 
 static void destroy_handle_cache(struct zs_pool *pool)
 {
-	if (pool->handle_cachep)
-		kmem_cache_destroy(pool->handle_cachep);
+	kmem_cache_destroy(pool->handle_cachep);
 }
 
 static unsigned long alloc_handle(struct zs_pool *pool)

From 708649694a8699ff91d395c4aef5ecea3ade14bc Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Date: Tue, 8 Sep 2015 15:04:58 -0700
Subject: [PATCH 730/734] zram: unify error reporting

Make zram syslog error reporting more consistent. We have random
error levels in some places. For example, critical errors like
  "Error allocating memory for compressed page"
and
  "Unable to allocate temp memory"
are reported as KERN_INFO messages.

a) Reassign error levels

Error messages that directly affect zram
functionality -- pr_err():

 Error allocating zram address table
 Error creating memory pool
 Decompression failed! err=%d, page=%u
 Unable to allocate temp memory
 Compression failed! err=%d
 Error allocating memory for compressed page: %u, size=%zu
 Cannot initialise %s compressing backend
 Error allocating disk queue for device %d
 Error allocating disk structure for device %d
 Error creating sysfs group for device %d
 Unable to register zram-control class
 Unable to get major number

Messages that do not affect functionality, but user
must be warned (because sysfs attrs will be removed in
this particular case) -- pr_warn():

 %d (%s) Attribute %s (and others) will be removed. %s

Messages that do not affect functionality and mostly are
informative -- pr_info():

 Cannot change max compression streams
 Can't change algorithm for initialized device
 Cannot change disksize for initialized device
 Added device: %s
 Removed device: %s

b) Update sysfs_create_group() error message

First, it lacks a trailing new line; add it.  Second, every error message
in zram_add() has a "for device %d" part, which makes errors more
informative.  Add missing part to "Error creating sysfs group" message.

Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/block/zram/zram_drv.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index f1c4bb34e007e1..9fa15bb9d118ee 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -622,7 +622,7 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
 		uncmem = user_mem;
 
 	if (!uncmem) {
-		pr_info("Unable to allocate temp memory\n");
+		pr_err("Unable to allocate temp memory\n");
 		ret = -ENOMEM;
 		goto out_cleanup;
 	}
@@ -719,7 +719,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
 
 	handle = zs_malloc(meta->mem_pool, clen);
 	if (!handle) {
-		pr_info("Error allocating memory for compressed page: %u, size=%zu\n",
+		pr_err("Error allocating memory for compressed page: %u, size=%zu\n",
 			index, clen);
 		ret = -ENOMEM;
 		goto out;
@@ -1039,7 +1039,7 @@ static ssize_t disksize_store(struct device *dev,
 
 	comp = zcomp_create(zram->compressor, zram->max_comp_streams);
 	if (IS_ERR(comp)) {
-		pr_info("Cannot initialise %s compressing backend\n",
+		pr_err("Cannot initialise %s compressing backend\n",
 				zram->compressor);
 		err = PTR_ERR(comp);
 		goto out_free_meta;
@@ -1217,7 +1217,7 @@ static int zram_add(void)
 	/* gendisk structure */
 	zram->disk = alloc_disk(1);
 	if (!zram->disk) {
-		pr_warn("Error allocating disk structure for device %d\n",
+		pr_err("Error allocating disk structure for device %d\n",
 			device_id);
 		ret = -ENOMEM;
 		goto out_free_queue;
@@ -1266,7 +1266,8 @@ static int zram_add(void)
 	ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj,
 				&zram_disk_attr_group);
 	if (ret < 0) {
-		pr_warn("Error creating sysfs group");
+		pr_err("Error creating sysfs group for device %d\n",
+				device_id);
 		goto out_free_disk;
 	}
 	strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
@@ -1406,13 +1407,13 @@ static int __init zram_init(void)
 
 	ret = class_register(&zram_control_class);
 	if (ret) {
-		pr_warn("Unable to register zram-control class\n");
+		pr_err("Unable to register zram-control class\n");
 		return ret;
 	}
 
 	zram_major = register_blkdev(0, "zram");
 	if (zram_major <= 0) {
-		pr_warn("Unable to get major number\n");
+		pr_err("Unable to get major number\n");
 		class_unregister(&zram_control_class);
 		return -EBUSY;
 	}

From 5b999aadbae65696a148f55250d94b6f3d74071e Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <0x7f454c46@gmail.com>
Date: Tue, 8 Sep 2015 15:05:00 -0700
Subject: [PATCH 731/734] mm: swap: zswap: maybe_preload & refactoring

zswap_get_swap_cache_page and read_swap_cache_async have pretty much the
same code with only significant difference in return value and usage of
swap_readpage.

I a helper __read_swap_cache_async() with the common code.  Behavior
change: now zswap_get_swap_cache_page will use radix_tree_maybe_preload
instead radix_tree_preload.  Looks like, this wasn't changed only by the
reason of code duplication.

Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@fb.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: David Herrmann <dh.herrmann@gmail.com>
Cc: Seth Jennings <sjennings@variantweb.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  3 ++
 mm/swap_state.c      | 37 +++++++++++++++-------
 mm/zswap.c           | 73 ++++----------------------------------------
 3 files changed, 35 insertions(+), 78 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 2ce190709280f2..7ba7dccaf0e7e1 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -406,6 +406,9 @@ extern void free_pages_and_swap_cache(struct page **, int);
 extern struct page *lookup_swap_cache(swp_entry_t);
 extern struct page *read_swap_cache_async(swp_entry_t, gfp_t,
 			struct vm_area_struct *vma, unsigned long addr);
+extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t,
+			struct vm_area_struct *vma, unsigned long addr,
+			bool *new_page_allocated);
 extern struct page *swapin_readahead(swp_entry_t, gfp_t,
 			struct vm_area_struct *vma, unsigned long addr);
 
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 8bc8e66138da1b..d504adb7fa5f08 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -288,17 +288,14 @@ struct page * lookup_swap_cache(swp_entry_t entry)
 	return page;
 }
 
-/* 
- * Locate a page of swap in physical memory, reserving swap cache space
- * and reading the disk if it is not already cached.
- * A failure return means that either the page allocation failed or that
- * the swap entry is no longer in use.
- */
-struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
-			struct vm_area_struct *vma, unsigned long addr)
+struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
+			struct vm_area_struct *vma, unsigned long addr,
+			bool *new_page_allocated)
 {
 	struct page *found_page, *new_page = NULL;
+	struct address_space *swapper_space = swap_address_space(entry);
 	int err;
+	*new_page_allocated = false;
 
 	do {
 		/*
@@ -306,8 +303,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 		 * called after lookup_swap_cache() failed, re-calling
 		 * that would confuse statistics.
 		 */
-		found_page = find_get_page(swap_address_space(entry),
-					entry.val);
+		found_page = find_get_page(swapper_space, entry.val);
 		if (found_page)
 			break;
 
@@ -366,7 +362,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 			 * Initiate read into locked page and return.
 			 */
 			lru_cache_add_anon(new_page);
-			swap_readpage(new_page);
+			*new_page_allocated = true;
 			return new_page;
 		}
 		radix_tree_preload_end();
@@ -384,6 +380,25 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 	return found_page;
 }
 
+/*
+ * Locate a page of swap in physical memory, reserving swap cache space
+ * and reading the disk if it is not already cached.
+ * A failure return means that either the page allocation failed or that
+ * the swap entry is no longer in use.
+ */
+struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
+			struct vm_area_struct *vma, unsigned long addr)
+{
+	bool page_was_allocated;
+	struct page *retpage = __read_swap_cache_async(entry, gfp_mask,
+			vma, addr, &page_was_allocated);
+
+	if (page_was_allocated)
+		swap_readpage(retpage);
+
+	return retpage;
+}
+
 static unsigned long swapin_nr_pages(unsigned long offset)
 {
 	static unsigned long prev_offset;
diff --git a/mm/zswap.c b/mm/zswap.c
index 2d5727baed5988..09208c7c86f3e0 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -446,75 +446,14 @@ enum zswap_get_swap_ret {
 static int zswap_get_swap_cache_page(swp_entry_t entry,
 				struct page **retpage)
 {
-	struct page *found_page, *new_page = NULL;
-	struct address_space *swapper_space = swap_address_space(entry);
-	int err;
+	bool page_was_allocated;
 
-	*retpage = NULL;
-	do {
-		/*
-		 * First check the swap cache.  Since this is normally
-		 * called after lookup_swap_cache() failed, re-calling
-		 * that would confuse statistics.
-		 */
-		found_page = find_get_page(swapper_space, entry.val);
-		if (found_page)
-			break;
-
-		/*
-		 * Get a new page to read into from swap.
-		 */
-		if (!new_page) {
-			new_page = alloc_page(GFP_KERNEL);
-			if (!new_page)
-				break; /* Out of memory */
-		}
-
-		/*
-		 * call radix_tree_preload() while we can wait.
-		 */
-		err = radix_tree_preload(GFP_KERNEL);
-		if (err)
-			break;
-
-		/*
-		 * Swap entry may have been freed since our caller observed it.
-		 */
-		err = swapcache_prepare(entry);
-		if (err == -EEXIST) { /* seems racy */
-			radix_tree_preload_end();
-			continue;
-		}
-		if (err) { /* swp entry is obsolete ? */
-			radix_tree_preload_end();
-			break;
-		}
-
-		/* May fail (-ENOMEM) if radix-tree node allocation failed. */
-		__set_page_locked(new_page);
-		SetPageSwapBacked(new_page);
-		err = __add_to_swap_cache(new_page, entry);
-		if (likely(!err)) {
-			radix_tree_preload_end();
-			lru_cache_add_anon(new_page);
-			*retpage = new_page;
-			return ZSWAP_SWAPCACHE_NEW;
-		}
-		radix_tree_preload_end();
-		ClearPageSwapBacked(new_page);
-		__clear_page_locked(new_page);
-		/*
-		 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
-		 * clear SWAP_HAS_CACHE flag.
-		 */
-		swapcache_free(entry);
-	} while (err != -ENOMEM);
-
-	if (new_page)
-		page_cache_release(new_page);
-	if (!found_page)
+	*retpage = __read_swap_cache_async(entry, GFP_KERNEL,
+			NULL, 0, &page_was_allocated);
+	if (page_was_allocated)
+		return ZSWAP_SWAPCACHE_NEW;
+	if (!*retpage)
 		return ZSWAP_SWAPCACHE_FAIL;
-	*retpage = found_page;
 	return ZSWAP_SWAPCACHE_EXIST;
 }
 

From 786727799a85aeabc20cab5ecfb72771bcbd6b85 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Tue, 8 Sep 2015 15:05:03 -0700
Subject: [PATCH 732/734] mm: zpool: constify the zpool_ops

The structure zpool_ops is not modified so make the pointer to it a
pointer to const.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Acked-by: Dan Streetman <ddstreet@ieee.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/zpool.h | 4 ++--
 mm/zbud.c             | 4 ++--
 mm/zpool.c            | 4 ++--
 mm/zsmalloc.c         | 3 ++-
 mm/zswap.c            | 2 +-
 5 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/include/linux/zpool.h b/include/linux/zpool.h
index d30eff3d84d542..c924a28d980501 100644
--- a/include/linux/zpool.h
+++ b/include/linux/zpool.h
@@ -37,7 +37,7 @@ enum zpool_mapmode {
 };
 
 struct zpool *zpool_create_pool(char *type, char *name,
-			gfp_t gfp, struct zpool_ops *ops);
+			gfp_t gfp, const struct zpool_ops *ops);
 
 char *zpool_get_type(struct zpool *pool);
 
@@ -81,7 +81,7 @@ struct zpool_driver {
 	atomic_t refcount;
 	struct list_head list;
 
-	void *(*create)(char *name, gfp_t gfp, struct zpool_ops *ops,
+	void *(*create)(char *name, gfp_t gfp, const struct zpool_ops *ops,
 			struct zpool *zpool);
 	void (*destroy)(void *pool);
 
diff --git a/mm/zbud.c b/mm/zbud.c
index f3bf6f7627d8d1..6f8158d6486484 100644
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -99,7 +99,7 @@ struct zbud_pool {
 	struct zbud_ops *ops;
 #ifdef CONFIG_ZPOOL
 	struct zpool *zpool;
-	struct zpool_ops *zpool_ops;
+	const struct zpool_ops *zpool_ops;
 #endif
 };
 
@@ -138,7 +138,7 @@ static struct zbud_ops zbud_zpool_ops = {
 };
 
 static void *zbud_zpool_create(char *name, gfp_t gfp,
-			       struct zpool_ops *zpool_ops,
+			       const struct zpool_ops *zpool_ops,
 			       struct zpool *zpool)
 {
 	struct zbud_pool *pool;
diff --git a/mm/zpool.c b/mm/zpool.c
index 722a4f60e90b29..951db32b833f38 100644
--- a/mm/zpool.c
+++ b/mm/zpool.c
@@ -22,7 +22,7 @@ struct zpool {
 
 	struct zpool_driver *driver;
 	void *pool;
-	struct zpool_ops *ops;
+	const struct zpool_ops *ops;
 
 	struct list_head list;
 };
@@ -115,7 +115,7 @@ static void zpool_put_driver(struct zpool_driver *driver)
  * Returns: New zpool on success, NULL on failure.
  */
 struct zpool *zpool_create_pool(char *type, char *name, gfp_t gfp,
-		struct zpool_ops *ops)
+		const struct zpool_ops *ops)
 {
 	struct zpool_driver *driver;
 	struct zpool *zpool;
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 089120429c1836..f135b1b6fcdcab 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -311,7 +311,8 @@ static void record_obj(unsigned long handle, unsigned long obj)
 
 #ifdef CONFIG_ZPOOL
 
-static void *zs_zpool_create(char *name, gfp_t gfp, struct zpool_ops *zpool_ops,
+static void *zs_zpool_create(char *name, gfp_t gfp,
+			     const struct zpool_ops *zpool_ops,
 			     struct zpool *zpool)
 {
 	return zs_create_pool(name, gfp);
diff --git a/mm/zswap.c b/mm/zswap.c
index 09208c7c86f3e0..48a1d081e2a5f1 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -755,7 +755,7 @@ static void zswap_frontswap_invalidate_area(unsigned type)
 	zswap_trees[type] = NULL;
 }
 
-static struct zpool_ops zswap_zpool_ops = {
+static const struct zpool_ops zswap_zpool_ops = {
 	.evict = zswap_writeback_entry
 };
 

From c83db4f419e7105af38cdcca80cc51213214a2c8 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Tue, 8 Sep 2015 15:05:06 -0700
Subject: [PATCH 733/734] mm: zbud: constify the zbud_ops

The structure zbud_ops is not modified so make the pointer to it a
pointer to const.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Acked-by: Dan Streetman <ddstreet@ieee.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/zbud.h | 2 +-
 mm/zbud.c            | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/linux/zbud.h b/include/linux/zbud.h
index f9d41a6e361f42..e183a0a65ac1cb 100644
--- a/include/linux/zbud.h
+++ b/include/linux/zbud.h
@@ -9,7 +9,7 @@ struct zbud_ops {
 	int (*evict)(struct zbud_pool *pool, unsigned long handle);
 };
 
-struct zbud_pool *zbud_create_pool(gfp_t gfp, struct zbud_ops *ops);
+struct zbud_pool *zbud_create_pool(gfp_t gfp, const struct zbud_ops *ops);
 void zbud_destroy_pool(struct zbud_pool *pool);
 int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
 	unsigned long *handle);
diff --git a/mm/zbud.c b/mm/zbud.c
index 6f8158d6486484..fa48bcdff9d5b9 100644
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -96,7 +96,7 @@ struct zbud_pool {
 	struct list_head buddied;
 	struct list_head lru;
 	u64 pages_nr;
-	struct zbud_ops *ops;
+	const struct zbud_ops *ops;
 #ifdef CONFIG_ZPOOL
 	struct zpool *zpool;
 	const struct zpool_ops *zpool_ops;
@@ -133,7 +133,7 @@ static int zbud_zpool_evict(struct zbud_pool *pool, unsigned long handle)
 		return -ENOENT;
 }
 
-static struct zbud_ops zbud_zpool_ops = {
+static const struct zbud_ops zbud_zpool_ops = {
 	.evict =	zbud_zpool_evict
 };
 
@@ -302,7 +302,7 @@ static int num_free_chunks(struct zbud_header *zhdr)
  * Return: pointer to the new zbud pool or NULL if the metadata allocation
  * failed.
  */
-struct zbud_pool *zbud_create_pool(gfp_t gfp, struct zbud_ops *ops)
+struct zbud_pool *zbud_create_pool(gfp_t gfp, const struct zbud_ops *ops)
 {
 	struct zbud_pool *pool;
 	int i;

From df69f52d990bd85159727bd26e819d3a6e49c666 Mon Sep 17 00:00:00 2001
From: Dan Streetman <ddstreet@ieee.org>
Date: Tue, 8 Sep 2015 15:05:09 -0700
Subject: [PATCH 734/734] zpool: remove no-op module init/exit

Remove zpool_init() and zpool_exit(); they do nothing other than print
"loaded" and "unloaded".

Signed-off-by: Dan Streetman <ddstreet@ieee.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/zpool.c | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/mm/zpool.c b/mm/zpool.c
index 951db32b833f38..68d2dd8ed2d8c6 100644
--- a/mm/zpool.c
+++ b/mm/zpool.c
@@ -320,20 +320,6 @@ u64 zpool_get_total_size(struct zpool *zpool)
 	return zpool->driver->total_size(zpool->pool);
 }
 
-static int __init init_zpool(void)
-{
-	pr_info("loaded\n");
-	return 0;
-}
-
-static void __exit exit_zpool(void)
-{
-	pr_info("unloaded\n");
-}
-
-module_init(init_zpool);
-module_exit(exit_zpool);
-
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
 MODULE_DESCRIPTION("Common API for compressed memory storage");