I’m testing the result of the csrr instructions and I’m getting some strange results.
Here is the test programme adapted from the Hello World sample programme.
#include <stdio.h>
int main()
// Padding instructions
__asm("nop; nop;");
__asm("nop; nop;");
__asm("nop; nop;");
__asm("nop; nop;");
__asm("nop; nop;");
__asm("nop; nop;");
__asm("nop; nop;");
__asm("nop; nop;");
__asm("nop; nop;");
__asm("nop; nop;");
__asm("nop; nop;");
__asm("nop; nop;");
__asm("nop; nop;");
__asm("nop; nop;");
__asm("nop; nop;"); // First instruction of a new cache line
__asm("nop; nop;");
__asm("csrr t4, minstret"); // I1, ensure that I2 is single issued
__asm("csrr t5, minstret"); // I2
__asm("csrr t6, minstret"); // I3
__asm("csrr a6, mcycle");
__asm("csrr a7, mcycle"); // Ensure that I4 is single issued
__asm("csrr a5, minstret"); // I4
__asm("sub a1, a7, a6");
__asm("sub a2, a5, t6");
printf("Cycle = %d, Instruction = %d\n");
__asm("mv a1, t4");
__asm("mv a2, t5");
__asm("mv a3, t6");
__asm("mv a4, a5");
printf("I1 = %d, I2 = %d, I3 = %d, I4 = %d\n");
return 0;
When I run this programme I get the following output, the results are as expected.
Cycle = 1, Instruction = 3, I1 = 7990, I2 = 7992, I3 = 7993, I4 = 7996
Disassemble code of main is:
0000000020000a00 <main>:
20000a00: 41 11 addi sp, sp, -16
20000a02: 06 e4 sd ra, 8(sp)
20000a04: 22 e0 sd s0, 0(sp)
20000a06: 00 08 addi s0, sp, 16
20000a08: 01 00 nop
20000a0a: 01 00 nop
20000a40: 01 00 nop
20000a42: 01 00 nop
20000a44: 01 00 nop
20000a46: 01 00 nop
20000a48: 73 23 20 b0 csrr t1, minstret
20000a4c: f3 23 20 b0 csrr t2, minstret
20000a50: 73 2e 20 b0 csrr t3, minstret
20000a54: f3 2e 00 b0 csrr t4, mcycle
20000a58: 73 2f 00 b0 csrr t5, mcycle
20000a5c: f3 2f 20 b0 csrr t6, minstret
20000a60: b3 05 df 41 sub a1, t5, t4
20000a64: 33 86 cf 41 sub a2, t6, t3
20000a68: 9a 86 mv a3, t1
20000a6a: 1e 87 mv a4, t2
20000a6c: f2 87 mv a5, t3
20000a6e: 7e 88 mv a6, t6
20000a70: 17 05 00 00 auipc a0, 0
20000a74: 13 05 85 9a addi a0, a0, -1624
20000a78: ef 00 00 01 jal 0x20000a88 <printf>
20000a7c: 81 47 li a5, 0
20000a7e: 3e 85 mv a0, a5
20000a80: a2 60 ld ra, 8(sp)
20000a82: 02 64 ld s0, 0(sp)
20000a84: 41 01 addi sp, sp, 16
20000a86: 82 80 ret
But when I place csrr t3, minstret as the first instruction of a new cache line, the result of I3 is greater than I2 by two. I3 should be only one greater than I2.
#include <stdio.h>
int main()
// Padding instructions
__asm("nop; nop;");
__asm("nop; nop;");
__asm("nop; nop;");
__asm("nop; nop;");
__asm("nop; nop;");
__asm("nop; nop;");
__asm("nop; nop;");
__asm("nop; nop;");
__asm("nop; nop;");
__asm("nop; nop;");
__asm("nop; nop;");
__asm("nop; nop;");
__asm("csrr t1, minstret"); // I1, ensure that I2 is single issued
__asm("csrr t2, minstret"); // I2
__asm("csrr t3, minstret"); // I3, first instruction of a new cache line
__asm("csrr t4, mcycle");
__asm("csrr t5, mcycle"); // Ensure that I4 is single issued
__asm("csrr t6, minstret"); // I4
__asm("sub a1, t5, t4");
__asm("sub a2, t6, t3");
__asm("mv a3, t1");
__asm("mv a4, t2");
__asm("mv a5, t3");
__asm("mv a6, t6");
printf("Cycle = %d, Instruction = %d, I1 = %d, I2 = %d, I3 = %d, I4 = %d\n");
return 0;
When running this program I get the following output:
Cycle = 1, Instruction = 2, I1 = 7982, I2 = 7984, I3 = 7986, I4 = 7988
Disassemble code of main is:
0000000020000a00 <main>:
20000a00: 41 11 addi sp, sp, -16
20000a02: 06 e4 sd ra, 8(sp)
20000a04: 22 e0 sd s0, 0(sp)
20000a06: 00 08 addi s0, sp, 16
20000a08: 01 00 nop
20000a0a: 01 00 nop
20000a34: 01 00 nop
20000a36: 01 00 nop
20000a38: 73 23 20 b0 csrr t1, minstret
20000a3c: f3 23 20 b0 csrr t2, minstret
20000a40: 73 2e 20 b0 csrr t3, minstret
20000a44: f3 2e 00 b0 csrr t4, mcycle
20000a48: 73 2f 00 b0 csrr t5, mcycle
20000a4c: f3 2f 20 b0 csrr t6, minstret
20000a50: b3 05 df 41 sub a1, t5, t4
20000a54: 33 86 cf 41 sub a2, t6, t3
20000a58: 9a 86 mv a3, t1
20000a5a: 1e 87 mv a4, t2
20000a5c: f2 87 mv a5, t3
20000a5e: 7e 88 mv a6, t6
20000a60: 17 05 00 00 auipc a0, 0
20000a64: 13 05 85 9b addi a0, a0, -1608
20000a68: ef 00 00 01 jal 0x20000a78 <printf>
20000a6c: 81 47 li a5, 0
20000a6e: 3e 85 mv a0, a5
20000a70: a2 60 ld ra, 8(sp)
20000a72: 02 64 ld s0, 0(sp)
20000a74: 41 01 addi sp, sp, 16
20000a76: 82 80 ret
And when I place csrr t6, minstret as the first instruction of a new cache line, the result of I4 is greater than I3 by four. I4 should be three greater than I3.
int main()
// Padding instructions
__asm("nop; nop;");
__asm("nop; nop;");
__asm("nop; nop;");
__asm("nop; nop;");
__asm("nop; nop;");
__asm("nop; nop;");
__asm("nop; nop;");
__asm("nop; nop;");
__asm("nop; nop;");
__asm("csrr t1, minstret"); // I1, ensure that I2 is single issued
__asm("csrr t2, minstret"); // I2
__asm("csrr t3, minstret"); // I3
__asm("csrr t4, mcycle");
__asm("csrr t5, mcycle"); // Ensure that I4 is single issued
__asm("csrr t6, minstret"); // I4, first instruction of a new cache line
__asm("sub a1, t5, t4");
__asm("sub a2, t6, t3");
__asm("mv a3, t1");
__asm("mv a4, t2");
__asm("mv a5, t3");
__asm("mv a6, t6");
printf("Cycle = %d, Instruction = %d, I1 = %d, I2 = %d, I3 = %d, I4 = %d\n");
return 0;
When running this program I get the following output:
Cycle = 1, Instruction = 4, I1 = 7976, I2 = 7978, I3 = 7979, I4 = 7983
Disassemble code of main is:
0000000020000a00 <main>:
20000a00: 41 11 addi sp, sp, -16
20000a02: 06 e4 sd ra, 8(sp)
20000a04: 22 e0 sd s0, 0(sp)
20000a06: 00 08 addi s0, sp, 16
20000a08: 01 00 nop
20000a0a: 01 00 nop
20000a28: 01 00 nop
20000a2a: 01 00 nop
20000a2c: 73 23 20 b0 csrr t1, minstret
20000a30: f3 23 20 b0 csrr t2, minstret
20000a34: 73 2e 20 b0 csrr t3, minstret
20000a38: f3 2e 00 b0 csrr t4, mcycle
20000a3c: 73 2f 00 b0 csrr t5, mcycle
20000a40: f3 2f 20 b0 csrr t6, minstret
20000a44: b3 05 df 41 sub a1, t5, t4
20000a48: 33 86 cf 41 sub a2, t6, t3
20000a4c: 9a 86 mv a3, t1
20000a4e: 1e 87 mv a4, t2
20000a50: f2 87 mv a5, t3
20000a52: 7e 88 mv a6, t6
20000a54: 17 05 00 00 auipc a0, 0
20000a58: 13 05 45 9c addi a0, a0, -1596
20000a5c: ef 00 00 01 jal 0x20000a6c <printf>
20000a60: 81 47 li a5, 0
20000a62: 3e 85 mv a0, a5
20000a64: a2 60 ld ra, 8(sp)
20000a66: 02 64 ld s0, 0(sp)
20000a68: 41 01 addi sp, sp, 16
20000a6a: 82 80 ret