Discussion:
[PATCH] Revert "powerpc: Switch to relative jump labels"
Roman Bolshakov
2021-05-28 01:29:43 UTC
Permalink
This reverts commit b0b3b2c78ec075cec4721986a95abbbac8c3da4f.

Otherwise, direct kernel boot with initramfs no longer works in QEMU.
It's broken in some bizarre way because a valid initramfs is not
recognized anymore:

Found initrd at 0xc000000001f70000:0xc000000003d61d64
rootfs image is not initramfs (XZ-compressed data is corrupt); looks like an initrd

The issue is observed on v5.13-rc3 if the kernel is built with
defconfig, GCC 7.5.0 and GNU ld 2.32.0.

Cc: Christophe Leroy <***@csgroup.eu>
Reported-by: Anastasia Kovaleva <***@yadro.com>
Signed-off-by: Roman Bolshakov <***@yadro.com>
---
arch/powerpc/Kconfig | 1 -
arch/powerpc/include/asm/jump_label.h | 21 +++++++++++++++------
arch/powerpc/kernel/jump_label.c | 4 ++--
3 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 088dd2afcfe4..59e0d55ee01d 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -189,7 +189,6 @@ config PPC
select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP
select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && PPC_RADIX_MMU
select HAVE_ARCH_JUMP_LABEL
- select HAVE_ARCH_JUMP_LABEL_RELATIVE
select HAVE_ARCH_KASAN if PPC32 && PPC_PAGE_SHIFT <= 14
select HAVE_ARCH_KASAN_VMALLOC if PPC32 && PPC_PAGE_SHIFT <= 14
select HAVE_ARCH_KFENCE if PPC32
diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h
index 2d5c6bec2b4f..09297ec9fa52 100644
--- a/arch/powerpc/include/asm/jump_label.h
+++ b/arch/powerpc/include/asm/jump_label.h
@@ -20,8 +20,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran
asm_volatile_goto("1:\n\t"
"nop # arch_static_branch\n\t"
".pushsection __jump_table, \"aw\"\n\t"
- ".long 1b - ., %l[l_yes] - .\n\t"
- JUMP_ENTRY_TYPE "%c0 - .\n\t"
+ JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t"
".popsection \n\t"
: : "i" (&((char *)key)[branch]) : : l_yes);

@@ -35,8 +34,7 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool
asm_volatile_goto("1:\n\t"
"b %l[l_yes] # arch_static_branch_jump\n\t"
".pushsection __jump_table, \"aw\"\n\t"
- ".long 1b - ., %l[l_yes] - .\n\t"
- JUMP_ENTRY_TYPE "%c0 - .\n\t"
+ JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t"
".popsection \n\t"
: : "i" (&((char *)key)[branch]) : : l_yes);

@@ -45,12 +43,23 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool
return true;
}

+#ifdef CONFIG_PPC64
+typedef u64 jump_label_t;
+#else
+typedef u32 jump_label_t;
+#endif
+
+struct jump_entry {
+ jump_label_t code;
+ jump_label_t target;
+ jump_label_t key;
+};
+
#else
#define ARCH_STATIC_BRANCH(LABEL, KEY) \
1098: nop; \
.pushsection __jump_table, "aw"; \
- .long 1098b - ., LABEL - .; \
- FTR_ENTRY_LONG KEY; \
+ FTR_ENTRY_LONG 1098b, LABEL, KEY; \
.popsection
#endif

diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c
index ce87dc5ea23c..144858027fa3 100644
--- a/arch/powerpc/kernel/jump_label.c
+++ b/arch/powerpc/kernel/jump_label.c
@@ -11,10 +11,10 @@
void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type)
{
- struct ppc_inst *addr = (struct ppc_inst *)jump_entry_code(entry);
+ struct ppc_inst *addr = (struct ppc_inst *)(unsigned long)entry->code;

if (type == JUMP_LABEL_JMP)
- patch_branch(addr, jump_entry_target(entry), 0);
+ patch_branch(addr, entry->target, 0);
else
patch_instruction(addr, ppc_inst(PPC_INST_NOP));
}
--
2.31.1
Greg Kurz
2021-05-28 10:34:35 UTC
Permalink
On Fri, 28 May 2021 04:29:43 +0300
Post by Roman Bolshakov
This reverts commit b0b3b2c78ec075cec4721986a95abbbac8c3da4f.
Otherwise, direct kernel boot with initramfs no longer works in QEMU.
It's broken in some bizarre way because a valid initramfs is not
Found initrd at 0xc000000001f70000:0xc000000003d61d64
rootfs image is not initramfs (XZ-compressed data is corrupt); looks like an initrd
The issue is observed on v5.13-rc3 if the kernel is built with
defconfig, GCC 7.5.0 and GNU ld 2.32.0.
---
I'm observing the very same issue and reverting the offending commit
fixes it indeed. Until someone has investigated the root cause, this
looks like a reasonable bug fix to me.

Reviewed-by: Greg Kurz <***@kaod.org>

and
Post by Roman Bolshakov
arch/powerpc/Kconfig | 1 -
arch/powerpc/include/asm/jump_label.h | 21 +++++++++++++++------
arch/powerpc/kernel/jump_label.c | 4 ++--
3 files changed, 17 insertions(+), 9 deletions(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 088dd2afcfe4..59e0d55ee01d 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -189,7 +189,6 @@ config PPC
select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP
select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && PPC_RADIX_MMU
select HAVE_ARCH_JUMP_LABEL
- select HAVE_ARCH_JUMP_LABEL_RELATIVE
select HAVE_ARCH_KASAN if PPC32 && PPC_PAGE_SHIFT <= 14
select HAVE_ARCH_KASAN_VMALLOC if PPC32 && PPC_PAGE_SHIFT <= 14
select HAVE_ARCH_KFENCE if PPC32
diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h
index 2d5c6bec2b4f..09297ec9fa52 100644
--- a/arch/powerpc/include/asm/jump_label.h
+++ b/arch/powerpc/include/asm/jump_label.h
@@ -20,8 +20,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran
asm_volatile_goto("1:\n\t"
"nop # arch_static_branch\n\t"
".pushsection __jump_table, \"aw\"\n\t"
- ".long 1b - ., %l[l_yes] - .\n\t"
- JUMP_ENTRY_TYPE "%c0 - .\n\t"
+ JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t"
".popsection \n\t"
: : "i" (&((char *)key)[branch]) : : l_yes);
@@ -35,8 +34,7 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool
asm_volatile_goto("1:\n\t"
"b %l[l_yes] # arch_static_branch_jump\n\t"
".pushsection __jump_table, \"aw\"\n\t"
- ".long 1b - ., %l[l_yes] - .\n\t"
- JUMP_ENTRY_TYPE "%c0 - .\n\t"
+ JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t"
".popsection \n\t"
: : "i" (&((char *)key)[branch]) : : l_yes);
@@ -45,12 +43,23 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool
return true;
}
+#ifdef CONFIG_PPC64
+typedef u64 jump_label_t;
+#else
+typedef u32 jump_label_t;
+#endif
+
+struct jump_entry {
+ jump_label_t code;
+ jump_label_t target;
+ jump_label_t key;
+};
+
#else
#define ARCH_STATIC_BRANCH(LABEL, KEY) \
1098: nop; \
.pushsection __jump_table, "aw"; \
- .long 1098b - ., LABEL - .; \
- FTR_ENTRY_LONG KEY; \
+ FTR_ENTRY_LONG 1098b, LABEL, KEY; \
.popsection
#endif
diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c
index ce87dc5ea23c..144858027fa3 100644
--- a/arch/powerpc/kernel/jump_label.c
+++ b/arch/powerpc/kernel/jump_label.c
@@ -11,10 +11,10 @@
void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type)
{
- struct ppc_inst *addr = (struct ppc_inst *)jump_entry_code(entry);
+ struct ppc_inst *addr = (struct ppc_inst *)(unsigned long)entry->code;
if (type == JUMP_LABEL_JMP)
- patch_branch(addr, jump_entry_target(entry), 0);
+ patch_branch(addr, entry->target, 0);
else
patch_instruction(addr, ppc_inst(PPC_INST_NOP));
}
Michael Ellerman
2021-05-28 23:39:49 UTC
Permalink
Post by Roman Bolshakov
This reverts commit b0b3b2c78ec075cec4721986a95abbbac8c3da4f.
Otherwise, direct kernel boot with initramfs no longer works in QEMU.
It's broken in some bizarre way because a valid initramfs is not
Found initrd at 0xc000000001f70000:0xc000000003d61d64
rootfs image is not initramfs (XZ-compressed data is corrupt); looks like an initrd
The issue is observed on v5.13-rc3 if the kernel is built with
defconfig, GCC 7.5.0 and GNU ld 2.32.0.
Are you able to try a different compiler?

I test booting qemu constantly, but I don't use GCC 7.5.

I assume your initramfs is compressed with XZ? How large is it
compressed?

And what qemu version are you using?

cheers
Roman Bolshakov
2021-05-30 14:07:23 UTC
Permalink
Post by Michael Ellerman
Post by Roman Bolshakov
This reverts commit b0b3b2c78ec075cec4721986a95abbbac8c3da4f.
Otherwise, direct kernel boot with initramfs no longer works in QEMU.
It's broken in some bizarre way because a valid initramfs is not
Found initrd at 0xc000000001f70000:0xc000000003d61d64
rootfs image is not initramfs (XZ-compressed data is corrupt); looks like an initrd
The issue is observed on v5.13-rc3 if the kernel is built with
defconfig, GCC 7.5.0 and GNU ld 2.32.0.
Are you able to try a different compiler?
Hi Michael,

I've just tried GCC 9.3.1 and the result is the same.

The offending patch has assembly inlines, they typically go through
binutils/GAS and it might also be a case when older binutils doesn't
implement something properly (i've seen this on x86 and arm).
Post by Michael Ellerman
I test booting qemu constantly, but I don't use GCC 7.5.
And what qemu version are you using?
QEMU 3.1.1, but I've also tried 6.0.50 (QEMU master, 62c0ac5041e913) and
it fails the same way.
Post by Michael Ellerman
I assume your initramfs is compressed with XZ? How large is it
compressed?
Yes, XZ. initramfs size is 30 MB (around 100 MB cpio size).

It's interesting that the issue doesn't happen if I pass initramfs from
host (11MB), then the initramfs can be recognized. It might be related
to initramfs size then and bigger initramfs that used to work no longer
work with v5.13-rc3.

So, I've created a small initramfs using only static busybox (2.7M
uncompressed, 960K compressed with xz). No error is produced and it
boots fine.

If I add a dummy file (11M off /dev/urandom) to the small busybox
initramfs, it boots and the init is started but I'm seeing the error:

rootfs image is not initramfs (XZ-compressed data is corrupt); looks like an initrd

sha1sum of the file inside initramfs doesn't match sha1sum on the host.

guest # sha1sum dummy
407c347e671ddd00f69df12b3368048bad0ebf0c dummy
# QEMU: Terminated
host $ sha1sum dummy
ed8494b3eecab804960ceba2c497270eed0b0cd1 dummy

sha1sum is the same in the guest and on the host for 10M dummy file:

guest # sha1sum dummy
43855f7a772a28cce91da9eb8f86f53bc807631f dummy
# QEMU: Terminated
host $ sha1sum dummy
43855f7a772a28cce91da9eb8f86f53bc807631f dummy

That might explain why bigger initramfs (or initramfs with bigger files)
doesn't boot - because some files might appear corrupted inside the guest.

Here're the sources of the initrd along with 11M dummy file:
https://drive.yadro.com/s/W8HdbPnaKmPPwK4

I've compressed it with:
$ find . 2>/dev/null | cpio -ocR 0:0 | xz --check=crc32 > ../initrd-dummy.xz

Hope this helps,
Roman
Michael Ellerman
2021-06-01 07:36:15 UTC
Permalink
Post by Roman Bolshakov
Post by Michael Ellerman
Post by Roman Bolshakov
This reverts commit b0b3b2c78ec075cec4721986a95abbbac8c3da4f.
Otherwise, direct kernel boot with initramfs no longer works in QEMU.
It's broken in some bizarre way because a valid initramfs is not
Found initrd at 0xc000000001f70000:0xc000000003d61d64
rootfs image is not initramfs (XZ-compressed data is corrupt); looks like an initrd
The issue is observed on v5.13-rc3 if the kernel is built with
defconfig, GCC 7.5.0 and GNU ld 2.32.0.
Are you able to try a different compiler?
Hi Michael,
I've just tried GCC 9.3.1 and the result is the same.
The offending patch has assembly inlines, they typically go through
binutils/GAS and it might also be a case when older binutils doesn't
implement something properly (i've seen this on x86 and arm).
Jump labels use asm goto, which is a compiler feature, but you're right
that the binutils version could also be important.

What ld versions have you tried?

And are those the toolchains from kernel.org or somewhere else?
Post by Roman Bolshakov
Post by Michael Ellerman
I test booting qemu constantly, but I don't use GCC 7.5.
And what qemu version are you using?
QEMU 3.1.1, but I've also tried 6.0.50 (QEMU master, 62c0ac5041e913) and
it fails the same way.
OK.
Post by Roman Bolshakov
Post by Michael Ellerman
I assume your initramfs is compressed with XZ? How large is it
compressed?
Yes, XZ. initramfs size is 30 MB (around 100 MB cpio size).
It's interesting that the issue doesn't happen if I pass initramfs from
host (11MB), then the initramfs can be recognized. It might be related
to initramfs size then and bigger initramfs that used to work no longer
work with v5.13-rc3.
Are you using qemu's -initrd option to pass the initramfs, or are you
building the initramfs into the kernel?
Post by Roman Bolshakov
So, I've created a small initramfs using only static busybox (2.7M
uncompressed, 960K compressed with xz). No error is produced and it
boots fine.
If I add a dummy file (11M off /dev/urandom) to the small busybox
rootfs image is not initramfs (XZ-compressed data is corrupt); looks like an initrd
sha1sum of the file inside initramfs doesn't match sha1sum on the host.
guest # sha1sum dummy
407c347e671ddd00f69df12b3368048bad0ebf0c dummy
# QEMU: Terminated
host $ sha1sum dummy
ed8494b3eecab804960ceba2c497270eed0b0cd1 dummy
guest # sha1sum dummy
43855f7a772a28cce91da9eb8f86f53bc807631f dummy
# QEMU: Terminated
host $ sha1sum dummy
43855f7a772a28cce91da9eb8f86f53bc807631f dummy
That might explain why bigger initramfs (or initramfs with bigger files)
doesn't boot - because some files might appear corrupted inside the guest.
https://drive.yadro.com/s/W8HdbPnaKmPPwK4
$ find . 2>/dev/null | cpio -ocR 0:0 | xz --check=crc32 > ../initrd-dummy.xz
Hope this helps,
I haven't been able to reproduce any corruption, with various initramfs
sizes.

Can you send us your kernel .config & qemu command line.

And can you try the patch below?

cheers


diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c
index ce87dc5ea23c..3d9878124cde 100644
--- a/arch/powerpc/kernel/jump_label.c
+++ b/arch/powerpc/kernel/jump_label.c
@@ -13,6 +13,9 @@ void arch_jump_label_transform(struct jump_entry *entry,
{
struct ppc_inst *addr = (struct ppc_inst *)jump_entry_code(entry);

+ if (!is_kernel_text((unsigned long)addr) && !is_kernel_inittext((unsigned long)addr))
+ printk("%s: addr %px %pS is not kernel text?\n", __func__, addr, addr);
+
if (type == JUMP_LABEL_JMP)
patch_branch(addr, jump_entry_target(entry), 0);
else
Greg Kurz
2021-06-07 17:03:43 UTC
Permalink
On Tue, 01 Jun 2021 17:36:15 +1000
Post by Michael Ellerman
Post by Roman Bolshakov
Post by Michael Ellerman
Post by Roman Bolshakov
This reverts commit b0b3b2c78ec075cec4721986a95abbbac8c3da4f.
Otherwise, direct kernel boot with initramfs no longer works in QEMU.
It's broken in some bizarre way because a valid initramfs is not
Found initrd at 0xc000000001f70000:0xc000000003d61d64
rootfs image is not initramfs (XZ-compressed data is corrupt); looks like an initrd
The issue is observed on v5.13-rc3 if the kernel is built with
defconfig, GCC 7.5.0 and GNU ld 2.32.0.
Are you able to try a different compiler?
Hi Michael,
I've just tried GCC 9.3.1 and the result is the same.
The offending patch has assembly inlines, they typically go through
binutils/GAS and it might also be a case when older binutils doesn't
implement something properly (i've seen this on x86 and arm).
Jump labels use asm goto, which is a compiler feature, but you're right
that the binutils version could also be important.
What ld versions have you tried?
And are those the toolchains from kernel.org or somewhere else?
Post by Roman Bolshakov
Post by Michael Ellerman
I test booting qemu constantly, but I don't use GCC 7.5.
And what qemu version are you using?
QEMU 3.1.1, but I've also tried 6.0.50 (QEMU master, 62c0ac5041e913) and
it fails the same way.
OK.
Post by Roman Bolshakov
Post by Michael Ellerman
I assume your initramfs is compressed with XZ? How large is it
compressed?
Yes, XZ. initramfs size is 30 MB (around 100 MB cpio size).
It's interesting that the issue doesn't happen if I pass initramfs from
host (11MB), then the initramfs can be recognized. It might be related
to initramfs size then and bigger initramfs that used to work no longer
work with v5.13-rc3.
Are you using qemu's -initrd option to pass the initramfs, or are you
building the initramfs into the kernel?
Hi Michael,

I'm hitting the same issue while trying to boot a RHEL9 guest with
the distro's default kernel/initramfs and grub.

Interestingly this doesn't happen with older QEMU, e.g. 4.2.0 that
is shipped with RHEL8. I've bissected to this commit from the
QEMU 5.0 era :


commit 8897ea5a9fc0aafa5ed7eee1e0c49893b91a2d87
Author: David Gibson <***@gibson.dropbear.id.au>
Date: Thu Nov 28 16:37:04 2019 +1100

spapr: Don't attempt to clamp RMA to VRMA constraint


This mostly changes how memory is presented in the FDT.

Before 8897ea5a9fc, for a VM with 1 gig of RAM, we had several nodes,
first one being the VRMA (limited to 256 megs).

***@20000000 {
ibm,associativity = <0x04 0x00 0x00 0x00 0x00>;
reg = <0x00 0x20000000 0x00 0x20000000>;
device_type = "memory";
};

***@10000000 {
ibm,associativity = <0x04 0x00 0x00 0x00 0x00>;
reg = <0x00 0x10000000 0x00 0x10000000>;
device_type = "memory";
};

***@0 {
ibm,associativity = <0x04 0x00 0x00 0x00 0x00>;
reg = <0x00 0x00 0x00 0x10000000>;
device_type = "memory";
};


Now we have a single node for all RAM:

***@0 {
ibm,associativity = <0x04 0x00 0x00 0x00 0x00>;
reg = <0x00 0x00 0x00 0x40000000>;
device_type = "memory";
};

If I set an arbitrary constraint again on the VRMA, I get the
multiple memory nodes back and, depending on the value, the
boot succeeds. In my 1 gig RHEL9 guest case, I need to set
a VRMA size <= 0x32000000.

Not sure how this can relate to the initramfs though. I just see
that grub doens't map it at the same place:

0x0000000003100000 when boot fails

0x000000000f000000 when boot succeeds

In case this rings a bell...
Post by Michael Ellerman
Post by Roman Bolshakov
So, I've created a small initramfs using only static busybox (2.7M
uncompressed, 960K compressed with xz). No error is produced and it
boots fine.
If I add a dummy file (11M off /dev/urandom) to the small busybox
rootfs image is not initramfs (XZ-compressed data is corrupt); looks like an initrd
sha1sum of the file inside initramfs doesn't match sha1sum on the host.
guest # sha1sum dummy
407c347e671ddd00f69df12b3368048bad0ebf0c dummy
# QEMU: Terminated
host $ sha1sum dummy
ed8494b3eecab804960ceba2c497270eed0b0cd1 dummy
guest # sha1sum dummy
43855f7a772a28cce91da9eb8f86f53bc807631f dummy
# QEMU: Terminated
host $ sha1sum dummy
43855f7a772a28cce91da9eb8f86f53bc807631f dummy
That might explain why bigger initramfs (or initramfs with bigger files)
doesn't boot - because some files might appear corrupted inside the guest.
https://drive.yadro.com/s/W8HdbPnaKmPPwK4
$ find . 2>/dev/null | cpio -ocR 0:0 | xz --check=crc32 > ../initrd-dummy.xz
Hope this helps,
I haven't been able to reproduce any corruption, with various initramfs
sizes.
Can you send us your kernel .config & qemu command line.
And can you try the patch below?
cheers
diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c
index ce87dc5ea23c..3d9878124cde 100644
--- a/arch/powerpc/kernel/jump_label.c
+++ b/arch/powerpc/kernel/jump_label.c
@@ -13,6 +13,9 @@ void arch_jump_label_transform(struct jump_entry *entry,
{
struct ppc_inst *addr = (struct ppc_inst *)jump_entry_code(entry);
+ if (!is_kernel_text((unsigned long)addr) && !is_kernel_inittext((unsigned long)addr))
+ printk("%s: addr %px %pS is not kernel text?\n", __func__, addr, addr);
+
I've applied this too. It doesn't produce any output in the crashing case.
On the contrary I get tons of them when I run with the hacked VRMA size,
but they show up much later, after we've already freed the initrd memory.

Cheers,

--
Greg
Post by Michael Ellerman
if (type == JUMP_LABEL_JMP)
patch_branch(addr, jump_entry_target(entry), 0);
else
Loading...