#include <asm/io.h>
#include <asm/msr.h>
#include <asm/cpufeature.h>
+#include <asm/i387.h>
#define PFX KBUILD_MODNAME ": "
* Another possible performance boost may come from simply buffering
* until we have 4 bytes, thus returning a u32 at a time,
* instead of the current u8-at-a-time.
+ *
+ * Padlock instructions can generate a spurious DNA fault, so
+ * we have to call them in the context of irq_ts_save/restore()
*/
static inline u32 xstore(u32 *addr, u32 edx_in)
{
u32 eax_out;
+ int ts_state;
+
+ ts_state = irq_ts_save();
asm(".byte 0x0F,0xA7,0xC0 /* xstore %%edi (addr=%0) */"
:"=m"(*addr), "=a"(eax_out)
:"D"(addr), "d"(edx_in));
+ irq_ts_restore(ts_state);
return eax_out;
}
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <asm/byteorder.h>
+#include <asm/i387.h>
#include "padlock.h"
/* Control word. */
asm volatile ("pushfl; popfl");
}
+/*
+ * While the padlock instructions don't use FP/SSE registers, they
+ * generate a spurious DNA fault when cr0.ts is '1'. These instructions
+ * should be used only inside the irq_ts_save/restore() context
+ */
+
static inline void padlock_xcrypt(const u8 *input, u8 *output, void *key,
void *control_word)
{
static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
struct aes_ctx *ctx = aes_ctx(tfm);
+ int ts_state;
padlock_reset_key();
+
+ ts_state = irq_ts_save();
aes_crypt(in, out, ctx->E, &ctx->cword.encrypt);
+ irq_ts_restore(ts_state);
}
static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
struct aes_ctx *ctx = aes_ctx(tfm);
+ int ts_state;
padlock_reset_key();
+
+ ts_state = irq_ts_save();
aes_crypt(in, out, ctx->D, &ctx->cword.decrypt);
+ irq_ts_restore(ts_state);
}
static struct crypto_alg aes_alg = {
struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
struct blkcipher_walk walk;
int err;
+ int ts_state;
padlock_reset_key();
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
+ ts_state = irq_ts_save();
while ((nbytes = walk.nbytes)) {
padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr,
ctx->E, &ctx->cword.encrypt,
nbytes &= AES_BLOCK_SIZE - 1;
err = blkcipher_walk_done(desc, &walk, nbytes);
}
+ irq_ts_restore(ts_state);
return err;
}
struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
struct blkcipher_walk walk;
int err;
+ int ts_state;
padlock_reset_key();
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
+ ts_state = irq_ts_save();
while ((nbytes = walk.nbytes)) {
padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr,
ctx->D, &ctx->cword.decrypt,
nbytes &= AES_BLOCK_SIZE - 1;
err = blkcipher_walk_done(desc, &walk, nbytes);
}
-
+ irq_ts_restore(ts_state);
return err;
}
struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
struct blkcipher_walk walk;
int err;
+ int ts_state;
padlock_reset_key();
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
+ ts_state = irq_ts_save();
while ((nbytes = walk.nbytes)) {
u8 *iv = padlock_xcrypt_cbc(walk.src.virt.addr,
walk.dst.virt.addr, ctx->E,
nbytes &= AES_BLOCK_SIZE - 1;
err = blkcipher_walk_done(desc, &walk, nbytes);
}
+ irq_ts_restore(ts_state);
return err;
}
struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
struct blkcipher_walk walk;
int err;
+ int ts_state;
padlock_reset_key();
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
+ ts_state = irq_ts_save();
while ((nbytes = walk.nbytes)) {
padlock_xcrypt_cbc(walk.src.virt.addr, walk.dst.virt.addr,
ctx->D, walk.iv, &ctx->cword.decrypt,
err = blkcipher_walk_done(desc, &walk, nbytes);
}
+ irq_ts_restore(ts_state);
return err;
}
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/scatterlist.h>
+#include <asm/i387.h>
#include "padlock.h"
#define SHA1_DEFAULT_FALLBACK "sha1-generic"
* PadLock microcode needs it that big. */
char buf[128+16];
char *result = NEAREST_ALIGNED(buf);
+ int ts_state;
((uint32_t *)result)[0] = SHA1_H0;
((uint32_t *)result)[1] = SHA1_H1;
((uint32_t *)result)[3] = SHA1_H3;
((uint32_t *)result)[4] = SHA1_H4;
+ /* prevent taking the spurious DNA fault with padlock. */
+ ts_state = irq_ts_save();
asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" /* rep xsha1 */
: "+S"(in), "+D"(result)
: "c"(count), "a"(0));
+ irq_ts_restore(ts_state);
padlock_output_block((uint32_t *)result, (uint32_t *)out, 5);
}
* PadLock microcode needs it that big. */
char buf[128+16];
char *result = NEAREST_ALIGNED(buf);
+ int ts_state;
((uint32_t *)result)[0] = SHA256_H0;
((uint32_t *)result)[1] = SHA256_H1;
((uint32_t *)result)[6] = SHA256_H6;
((uint32_t *)result)[7] = SHA256_H7;
+ /* prevent taking the spurious DNA fault with padlock. */
+ ts_state = irq_ts_save();
asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */
: "+S"(in), "+D"(result)
: "c"(count), "a"(0));
+ irq_ts_restore(ts_state);
padlock_output_block((uint32_t *)result, (uint32_t *)out, 8);
}
#include <linux/sched.h>
#include <linux/kernel_stat.h>
#include <linux/regset.h>
+#include <linux/hardirq.h>
#include <asm/asm.h>
#include <asm/processor.h>
#include <asm/sigcontext.h>
preempt_enable();
}
+/*
+ * Some instructions like VIA's padlock instructions generate a spurious
+ * DNA fault but don't modify SSE registers. And these instructions
+ * get used from interrupt context aswell. To prevent these kernel instructions
+ * in interrupt context interact wrongly with other user/kernel fpu usage, we
+ * should use them only in the context of irq_ts_save/restore()
+ */
+static inline int irq_ts_save(void)
+{
+ /*
+ * If we are in process context, we are ok to take a spurious DNA fault.
+ * Otherwise, doing clts() in process context require pre-emption to
+ * be disabled or some heavy lifting like kernel_fpu_begin()
+ */
+ if (!in_interrupt())
+ return 0;
+
+ if (read_cr0() & X86_CR0_TS) {
+ clts();
+ return 1;
+ }
+
+ return 0;
+}
+
+static inline void irq_ts_restore(int TS_state)
+{
+ if (TS_state)
+ stts();
+}
+
#ifdef CONFIG_X86_64
static inline void save_init_fpu(struct task_struct *tsk)