[PATCH] x86: Replace CpuABM with CpuPOPCNT
H.J. Lu
hjl.tools@gmail.com
Mon Feb 17 15:27:00 GMT 2020
AMD ABM has 2 instructions: popcnt and lzcnt. ABM CPUID feature bit has
been reused for lzcnt and a POPCNT CPUID feature bit is added for popcnt.
This patch removes CpuABM and adds CpuPOPCNT. It changes ABM to enable
both lzcnt and popcnt, changes SSE4.2 to also enable popcnt.
I will check it in shortly.
H.J.
---
gas/
* config/tc-i386.c (cpu_arch): Add .popcnt.
* doc/c-i386.texi: Remove abm and .abm. Add popcnt and .popcnt.
Add a tab before @samp{.sse4a}.
opcodes/
* i386-gen.c (cpu_flag_init): Replace CpuABM with
CpuLZCNT|CpuPOPCNT. Add CpuPOPCNT to CPU_SSE4_2_FLAGS. Add
CPU_POPCNT_FLAGS.
(cpu_flags): Remove CpuABM. Add CpuPOPCNT.
* i386-opc.h (CpuABM): Removed.
(CpuPOPCNT): New.
(i386_cpu_flags): Remove cpuabm. Add cpupopcnt.
* i386-opc.tbl: Replace CpuABM|CpuSSE4_2 with CpuPOPCNT on
popcnt. Remove CpuABM from lzcnt.
* i386-init.h: Regenerated.
* i386-tbl.h: Likewise.
---
gas/ChangeLog | 6 +
gas/config/tc-i386.c | 2 +
gas/doc/c-i386.texi | 11 +-
opcodes/ChangeLog | 14 +
opcodes/i386-gen.c | 16 +-
opcodes/i386-init.h | 240 +-
opcodes/i386-opc.h | 12 +-
opcodes/i386-opc.tbl | 8 +-
opcodes/i386-tbl.h | 5380 +++++++++++++++++++++---------------------
9 files changed, 2862 insertions(+), 2827 deletions(-)
diff --git a/gas/ChangeLog b/gas/ChangeLog
index b6ce3ef834..095e457822 100644
--- a/gas/ChangeLog
+++ b/gas/ChangeLog
@@ -1,3 +1,9 @@
+2020-02-17 H.J. Lu <hongjiu.lu@intel.com>
+
+ * config/tc-i386.c (cpu_arch): Add .popcnt.
+ * doc/c-i386.texi: Remove abm and .abm. Add popcnt and .popcnt.
+ Add a tab before @samp{.sse4a}.
+
2020-02-17 Jan Beulich <jbeulich@suse.com>
* config/tc-i386.c (process_suffix): Don't try to guess a suffix
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index c4c94ca52f..f559ad4103 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -1055,6 +1055,8 @@ static const arch_entry cpu_arch[] =
CPU_EPT_FLAGS, 0 },
{ STRING_COMMA_LEN (".lzcnt"), PROCESSOR_UNKNOWN,
CPU_LZCNT_FLAGS, 0 },
+ { STRING_COMMA_LEN (".popcnt"), PROCESSOR_UNKNOWN,
+ CPU_POPCNT_FLAGS, 0 },
{ STRING_COMMA_LEN (".hle"), PROCESSOR_UNKNOWN,
CPU_HLE_FLAGS, 0 },
{ STRING_COMMA_LEN (".rtm"), PROCESSOR_UNKNOWN,
diff --git a/gas/doc/c-i386.texi b/gas/doc/c-i386.texi
index 91586cd999..8c8e8d0f18 100644
--- a/gas/doc/c-i386.texi
+++ b/gas/doc/c-i386.texi
@@ -238,6 +238,7 @@ accept various extension mnemonics. For example,
@code{movbe},
@code{ept},
@code{lzcnt},
+@code{popcnt},
@code{hle},
@code{rtm},
@code{invpcid},
@@ -260,8 +261,7 @@ accept various extension mnemonics. For example,
@code{3dnowa},
@code{sse4a},
@code{sse5},
-@code{svme},
-@code{abm} and
+@code{svme} and
@code{padlock}.
Note that rather than extending a basic instruction set, the extension
mnemonics starting with @code{no} revoke the respective functionality.
@@ -1430,13 +1430,14 @@ supported on the CPU specified. The choices for @var{cpu_type} are:
@item @samp{bdver4} @tab @samp{znver1} @tab @samp{znver2} @tab @samp{btver1}
@item @samp{btver2} @tab @samp{generic32} @tab @samp{generic64}
@item @samp{.cmov} @tab @samp{.fxsr} @tab @samp{.mmx}
-@item @samp{.sse} @tab @samp{.sse2} @tab @samp{.sse3} @samp{.sse4a}
+@item @samp{.sse} @tab @samp{.sse2} @tab @samp{.sse3} @tab @samp{.sse4a}
@item @samp{.ssse3} @tab @samp{.sse4.1} @tab @samp{.sse4.2} @tab @samp{.sse4}
@item @samp{.avx} @tab @samp{.vmx} @tab @samp{.smx} @tab @samp{.ept}
@item @samp{.clflush} @tab @samp{.movbe} @tab @samp{.xsave} @tab @samp{.xsaveopt}
@item @samp{.aes} @tab @samp{.pclmul} @tab @samp{.fma} @tab @samp{.fsgsbase}
@item @samp{.rdrnd} @tab @samp{.f16c} @tab @samp{.avx2} @tab @samp{.bmi2}
-@item @samp{.lzcnt} @tab @samp{.invpcid} @tab @samp{.vmfunc} @tab @samp{.hle}
+@item @samp{.lzcnt} @tab @samp{.popcnt} @tab @samp{.invpcid} @tab @samp{.vmfunc}
+@item @samp{.hle}
@item @samp{.rtm} @tab @samp{.adx} @tab @samp{.rdseed} @tab @samp{.prfchw}
@item @samp{.smap} @tab @samp{.mpx} @tab @samp{.sha} @tab @samp{.prefetchwt1}
@item @samp{.clflushopt} @tab @samp{.xsavec} @tab @samp{.xsaves} @tab @samp{.se1}
@@ -1450,7 +1451,7 @@ supported on the CPU specified. The choices for @var{cpu_type} are:
@item @samp{.shstk} @tab @samp{.gfni} @tab @samp{.vaes} @tab @samp{.vpclmulqdq}
@item @samp{.movdiri} @tab @samp{.movdir64b} @tab @samp{.enqcmd}
@item @samp{.3dnow} @tab @samp{.3dnowa} @tab @samp{.sse4a} @tab @samp{.sse5}
-@item @samp{.syscall} @tab @samp{.rdtscp} @tab @samp{.svme} @tab @samp{.abm}
+@item @samp{.syscall} @tab @samp{.rdtscp} @tab @samp{.svme}
@item @samp{.lwp} @tab @samp{.fma4} @tab @samp{.xop} @tab @samp{.cx16}
@item @samp{.padlock} @tab @samp{.clzero} @tab @samp{.mwaitx} @tab @samp{.rdpru}
@item @samp{.mcommit}
diff --git a/opcodes/ChangeLog b/opcodes/ChangeLog
index f8715a369e..0df7e5004d 100644
--- a/opcodes/ChangeLog
+++ b/opcodes/ChangeLog
@@ -1,3 +1,17 @@
+2020-02-17 H.J. Lu <hongjiu.lu@intel.com>
+
+ * i386-gen.c (cpu_flag_init): Replace CpuABM with
+ CpuLZCNT|CpuPOPCNT. Add CpuPOPCNT to CPU_SSE4_2_FLAGS. Add
+ CPU_POPCNT_FLAGS.
+ (cpu_flags): Remove CpuABM. Add CpuPOPCNT.
+ * i386-opc.h (CpuABM): Removed.
+ (CpuPOPCNT): New.
+ (i386_cpu_flags): Remove cpuabm. Add cpupopcnt.
+ * i386-opc.tbl: Replace CpuABM|CpuSSE4_2 with CpuPOPCNT on
+ popcnt. Remove CpuABM from lzcnt.
+ * i386-init.h: Regenerated.
+ * i386-tbl.h: Likewise.
+
2020-02-17 Jan Beulich <jbeulich@suse.com>
* i386-opc.tbl (vcvtsi2sd, vcvtsi2ss, vcvtusi2sd, vcvtusi2ss):
diff --git a/opcodes/i386-gen.c b/opcodes/i386-gen.c
index 4d98d31b74..52e6b3e21a 100644
--- a/opcodes/i386-gen.c
+++ b/opcodes/i386-gen.c
@@ -90,9 +90,9 @@ static initializer cpu_flag_init[] =
{ "CPU_K8_FLAGS",
"CPU_ATHLON_FLAGS|CpuRdtscp|CPU_SSE2_FLAGS|CpuLM" },
{ "CPU_AMDFAM10_FLAGS",
- "CPU_K8_FLAGS|CpuFISTTP|CPU_SSE4A_FLAGS|CpuABM" },
+ "CPU_K8_FLAGS|CpuFISTTP|CPU_SSE4A_FLAGS|CpuLZCNT|CpuPOPCNT" },
{ "CPU_BDVER1_FLAGS",
- "CPU_GENERIC64_FLAGS|CpuFISTTP|CpuRdtscp|CpuCX16|CPU_XOP_FLAGS|CpuABM|CpuLWP|CpuSVME|CpuAES|CpuPCLMUL|CpuLZCNT|CpuPRFCHW" },
+ "CPU_GENERIC64_FLAGS|CpuFISTTP|CpuRdtscp|CpuCX16|CPU_XOP_FLAGS|CpuLZCNT|CpuPOPCNT|CpuLWP|CpuSVME|CpuAES|CpuPCLMUL|CpuPRFCHW" },
{ "CPU_BDVER2_FLAGS",
"CPU_BDVER1_FLAGS|CpuFMA|CpuBMI|CpuTBM|CpuF16C" },
{ "CPU_BDVER3_FLAGS",
@@ -100,11 +100,11 @@ static initializer cpu_flag_init[] =
{ "CPU_BDVER4_FLAGS",
"CPU_BDVER3_FLAGS|CpuAVX2|CpuMovbe|CpuBMI2|CpuRdRnd|CpuMWAITX" },
{ "CPU_ZNVER1_FLAGS",
- "CPU_GENERIC64_FLAGS|CpuFISTTP|CpuRdtscp|CpuCX16|CPU_AVX2_FLAGS|CpuSSE4A|CpuABM|CpuSVME|CpuAES|CpuPCLMUL|CpuLZCNT|CpuPRFCHW|CpuFMA|CpuBMI|CpuF16C|CpuXsaveopt|CpuFSGSBase|CpuMovbe|CpuBMI2|CpuRdRnd|CpuADX|CpuRdSeed|CpuSMAP|CpuSHA|CpuXSAVEC|CpuXSAVES|CpuClflushOpt|CpuCLZERO|CpuMWAITX" },
+ "CPU_GENERIC64_FLAGS|CpuFISTTP|CpuRdtscp|CpuCX16|CPU_AVX2_FLAGS|CpuSSE4A|CpuLZCNT|CpuPOPCNT|CpuSVME|CpuAES|CpuPCLMUL|CpuPRFCHW|CpuFMA|CpuBMI|CpuF16C|CpuXsaveopt|CpuFSGSBase|CpuMovbe|CpuBMI2|CpuRdRnd|CpuADX|CpuRdSeed|CpuSMAP|CpuSHA|CpuXSAVEC|CpuXSAVES|CpuClflushOpt|CpuCLZERO|CpuMWAITX" },
{ "CPU_ZNVER2_FLAGS",
"CPU_ZNVER1_FLAGS|CpuCLWB|CpuRDPID|CpuRDPRU|CpuMCOMMIT|CpuWBNOINVD" },
{ "CPU_BTVER1_FLAGS",
- "CPU_GENERIC64_FLAGS|CpuFISTTP|CpuCX16|CpuRdtscp|CPU_SSSE3_FLAGS|CpuSSE4A|CpuABM|CpuPRFCHW|CpuCX16|CpuClflush|CpuFISTTP|CpuSVME|CpuLZCNT" },
+ "CPU_GENERIC64_FLAGS|CpuFISTTP|CpuCX16|CpuRdtscp|CPU_SSSE3_FLAGS|CpuSSE4A|CpuLZCNT|CpuPOPCNT|CpuPRFCHW|CpuCX16|CpuClflush|CpuFISTTP|CpuSVME" },
{ "CPU_BTVER2_FLAGS",
"CPU_BTVER1_FLAGS|CPU_AVX_FLAGS|CpuBMI|CpuF16C|CpuAES|CpuPCLMUL|CpuMovbe|CpuXsaveopt|CpuPRFCHW" },
{ "CPU_8087_FLAGS",
@@ -138,7 +138,7 @@ static initializer cpu_flag_init[] =
{ "CPU_SSE4_1_FLAGS",
"CPU_SSSE3_FLAGS|CpuSSE4_1" },
{ "CPU_SSE4_2_FLAGS",
- "CPU_SSE4_1_FLAGS|CpuSSE4_2" },
+ "CPU_SSE4_1_FLAGS|CpuSSE4_2|CpuPOPCNT" },
{ "CPU_VMX_FLAGS",
"CpuVMX" },
{ "CPU_SMX_FLAGS",
@@ -181,6 +181,8 @@ static initializer cpu_flag_init[] =
"CpuBMI2" },
{ "CPU_LZCNT_FLAGS",
"CpuLZCNT" },
+ { "CPU_POPCNT_FLAGS",
+ "CpuPOPCNT" },
{ "CPU_HLE_FLAGS",
"CpuHLE" },
{ "CPU_RTM_FLAGS",
@@ -200,7 +202,7 @@ static initializer cpu_flag_init[] =
{ "CPU_SSE4A_FLAGS",
"CPU_SSE3_FLAGS|CpuSSE4a" },
{ "CPU_ABM_FLAGS",
- "CpuABM" },
+ "CpuLZCNT|CpuPOPCNT" },
{ "CPU_AVX_FLAGS",
"CPU_SSE4_2_FLAGS|CPU_XSAVE_FLAGS|CpuAVX" },
{ "CPU_AVX2_FLAGS",
@@ -536,7 +538,6 @@ static bitfield cpu_flags[] =
BITFIELD (CpuSVME),
BITFIELD (CpuVMX),
BITFIELD (CpuSMX),
- BITFIELD (CpuABM),
BITFIELD (CpuXsave),
BITFIELD (CpuXsaveopt),
BITFIELD (CpuAES),
@@ -557,6 +558,7 @@ static bitfield cpu_flags[] =
BITFIELD (CpuF16C),
BITFIELD (CpuBMI2),
BITFIELD (CpuLZCNT),
+ BITFIELD (CpuPOPCNT),
BITFIELD (CpuHLE),
BITFIELD (CpuRTM),
BITFIELD (CpuINVPCID),
diff --git a/opcodes/i386-opc.h b/opcodes/i386-opc.h
index ccf5d91067..fc69d4d0fb 100644
--- a/opcodes/i386-opc.h
+++ b/opcodes/i386-opc.h
@@ -87,8 +87,10 @@ enum
CpuSSSE3,
/* SSE4a support required */
CpuSSE4a,
- /* ABM New Instructions required */
- CpuABM,
+ /* LZCNT support required */
+ CpuLZCNT,
+ /* POPCNT support required */
+ CpuPOPCNT,
/* SSE4.1 support required */
CpuSSE4_1,
/* SSE4.2 support required */
@@ -154,8 +156,6 @@ enum
CpuF16C,
/* Intel BMI2 support required */
CpuBMI2,
- /* LZCNT support required */
- CpuLZCNT,
/* HLE support required */
CpuHLE,
/* RTM support required */
@@ -298,7 +298,8 @@ typedef union i386_cpu_flags
unsigned int cpusmx:1;
unsigned int cpussse3:1;
unsigned int cpusse4a:1;
- unsigned int cpuabm:1;
+ unsigned int cpulzcnt:1;
+ unsigned int cpupopcnt:1;
unsigned int cpusse4_1:1;
unsigned int cpusse4_2:1;
unsigned int cpuavx:1;
@@ -331,7 +332,6 @@ typedef union i386_cpu_flags
unsigned int cpurdrnd:1;
unsigned int cpuf16c:1;
unsigned int cpubmi2:1;
- unsigned int cpulzcnt:1;
unsigned int cpuhle:1;
unsigned int cpurtm:1;
unsigned int cpuinvpcid:1;
diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl
index f5b31d1df9..13933c9e4c 100644
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -2843,9 +2843,11 @@ extrq, 2, 0x660f79, None, 2, CpuSSE4a, Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_
insertq, 2, 0xf20f79, None, 2, CpuSSE4a, Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM, RegXMM }
insertq, 4, 0xf20f78, None, 2, CpuSSE4a, Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, Imm8, RegXMM, RegXMM }
-// ABM instructions
-popcnt, 2, 0xf30fb8, None, 2, CpuABM|CpuSSE4_2, Modrm|CheckRegSize|No_bSuf|No_sSuf|No_ldSuf|NoAVX, { Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
-lzcnt, 2, 0xf30fbd, None, 2, CpuABM|CpuLZCNT, Modrm|CheckRegSize|No_bSuf|No_sSuf|No_ldSuf, { Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
+// LZCNT instruction
+lzcnt, 2, 0xf30fbd, None, 2, CpuLZCNT, Modrm|CheckRegSize|No_bSuf|No_sSuf|No_ldSuf, { Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
+
+// POPCNT instruction
+popcnt, 2, 0xf30fb8, None, 2, CpuPOPCNT, Modrm|CheckRegSize|No_bSuf|No_sSuf|No_ldSuf|NoAVX, { Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
// VIA PadLock extensions.
xstore-rng, 0, 0xfa7c0, None, 3, CpuPadLock, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|IsString|RepPrefixOk, { 0 }
More information about the Binutils
mailing list