0

Hello support team,

I’m confused about other case of difference of generated code regarding GNURX for RXv3 between with and without -mdfpu as follows. Is this intended?

SOURCE:

#include <math.h>

struct vec { float X; float Y; };

void rot(float t, struct vec *v1, struct vec *v2)
{
v2->X = – sinf(t) * v1->Y + cosf(t) * v1->X;
v2->Y = sinf(t) * v1->X + cosf(t) * v1->Y;
}

CASE A: without -mdfpu

rx-elf-gcc -std=gnu99 -O3 -misa=v3 -mtfu=intrinsic,mathlib -Wa,-adln=rxv3_tfu_sinf_cosf.lst -c rxv3_tfu_sinf_cosf.c

1 .file “rxv3_tfu_sinf_cosf.c”
2 .section P,”ax”
3 .global _rot
5 _rot:
6 0000 7E A7 push.l r7
7 0002 FB FE 14 14 08 mov.L #0x81414, r15
8 0007 E3 F1 mov.L r1, [r15]
9 0009 FB EE 10 14 08 mov.L #0x81410, r14
10 000e EC F4 mov.L [r15], r4
11 0010 A8 2F mov.L 4[r2], r7
12 0012 E7 E1 01 mov.L r1, 4[r14]
13 0015 EC E5 mov.L [r14], r5
14 0017 70 44 00 00 00 80 add #-2147483648, r4 <– HERE
15 001d FC 8F 74 fmul r7, r4
16 0020 EC 27 mov.L [r2], r7
17 0022 FC 8F 75 fmul r7, r5
18 0025 FC 8B 45 fadd r4, r5
19 0028 E3 35 mov.L r5, [r3]
20 002a E3 F1 mov.L r1, [r15]
21 002c EC F4 mov.L [r15], r4
22 002e EC 2F mov.L [r2], r15
23 0030 E7 E1 01 mov.L r1, 4[r14]
24 0033 EC E5 mov.L [r14], r5
25 0035 A8 2A mov.L 4[r2], r2
26 0037 FC 8F F4 fmul r15, r4
27 003a FC 8F 25 fmul r2, r5
28 003d FC 8B 45 fadd r4, r5
29 0040 A0 3D mov.L r5, 4[r3]
30 0042 3F 77 01 rtsd #4, r7-r7
32 .ident “GCC: (GCC_Build_20220528) 8.3.0.202202-GNURX 20190222”

CASE B: with -mdfp

rx-elf-gcc -std=gnu99 -O3 -misa=v3 -mdfpu -mtfu=intrinsic,mathlib -Wa,-adln=rxv3_tfu_sinf_cosf.lst -c rxv3_tfu_sinf_cosf.c

1 .file “rxv3_tfu_sinf_cosf.c”
2 .section P,”ax”
3 .global _rot
5 _rot:
6 0000 7E A7 push.l r7
7 0002 75 B0 01 dpushm.d dr0-dr1
8 0005 FB EE 14 14 08 mov.L #0x81414, r14
9 000a E3 E1 mov.L r1, [r14]
10 000c EC E4 mov.L [r14], r4
11 000e FB FE 10 14 08 mov.L #0x81410, r15
12 0013 A8 2F mov.L 4[r2], r7
13 0015 E7 F1 01 mov.L r1, 4[r15]
14 0018 FD 77 84 0A ftod r4, dr0 <– HERE
15 001c EC F4 mov.L [r15], r4
16 001e 76 90 0C 02 dneg dr0, dr0 <– HERE
17 0022 76 90 0D 1C dtof dr0, dr1 <– HERE
18 0026 FD 75 85 10 dmov.L drl1, r5 <– HERE
19 002a FC 8F 75 fmul r7, r5
20 002d EC 27 mov.L [r2], r7
21 002f FC 8F 74 fmul r7, r4
22 0032 FC 8B 45 fadd r4, r5
23 0035 E3 35 mov.L r5, [r3]
24 0037 E3 E1 mov.L r1, [r14]
25 0039 EC E4 mov.L [r14], r4
26 003b EC 2E mov.L [r2], r14
27 003d E7 F1 01 mov.L r1, 4[r15]
28 0040 EC F5 mov.L [r15], r5
29 0042 A8 2A mov.L 4[r2], r2
30 0044 FC 8F E4 fmul r14, r4
31 0047 FC 8F 25 fmul r2, r5
32 004a FC 8B 45 fadd r4, r5
33 004d A0 3D mov.L r5, 4[r3]
34 004f 75 B8 01 dpopm.d dr0-dr1
35 0052 7E B7 pop r7
36 0054 02 rts
38 .ident “GCC: (GCC_Build_20220528) 8.3.0.202202-GNURX 20190222”

Best regards,
NoMaY

NoMaY-jp コメント済
    • Hello NoMaY-san,

      This seems to be a similar issue to your sqrtf question.

      We have raised an internal bug ticket and we hope to fix this in a future release.
      __
      Best regards,
      The Open Source Tools Team

    • Hello support team,

      Thank you for improving this in the GCC 8.3.0.202204-GNURX. I get the following code even in the CASE B.

      CASE B: with -mdfp

      rx-elf-gcc -std=gnu99 -O3 -misa=v3 -mdfpu -mtfu=intrinsic,mathlib -Wa,-adln=rxv3_tfu_sinf_cosf.lst -c rxv3_tfu_sinf_cosf.c

      1 .file “rxv3_tfu_sinf_cosf.c”
      2 .section P,”ax”
      3 .global _rot
      5 _rot:
      6 0000 7E A7 push.l r7
      7 0002 FB EE 14 14 08 mov.L #0x81414, r14
      8 0007 E3 E1 mov.L r1, [r14]
      9 0009 FB 4E 10 14 08 mov.L #0x81410, r4
      10 000e EC EF mov.L [r14], r15
      11 0010 A8 2F mov.L 4[r2], r7
      12 0012 A0 49 mov.L r1, 4[r4]
      13 0014 EC 45 mov.L [r4], r5
      14 0016 FC 8F 7F fmul r7, r15
      15 0019 EC 27 mov.L [r2], r7
      16 001b FC 8F 75 fmul r7, r5
      17 001e FC 83 F5 fsub r15, r5
      18 0021 E3 35 mov.L r5, [r3]
      19 0023 E3 E1 mov.L r1, [r14]
      20 0025 EC EE mov.L [r14], r14
      21 0027 EC 2F mov.L [r2], r15
      22 0029 A0 49 mov.L r1, 4[r4]
      23 002b EC 45 mov.L [r4], r5
      24 002d A8 2C mov.L 4[r2], r4
      25 002f FC 8F FE fmul r15, r14
      26 0032 FC 8F 45 fmul r4, r5
      27 0035 FC 8B E5 fadd r14, r5
      28 0038 A0 3D mov.L r5, 4[r3]
      29 003a 3F 77 01 rtsd #4, r7-r7
      31 .ident “GCC: (GCC_Build_20221126) 8.3.0.202204-GNURX 20190222”

      In the Release Notes of the GCC 8.3.0.202204-GNURX:

      2. [Improvement] Negated floats getting promoted to double

      In previous releases, negation of floats resulted in their promotion to double, generating sub-optimal
      code if doubles were represented on 8 bytes.
      In the current release of the compiler, promotion will no longer happen when negating floats.

      Best regards,
      NoMaY

      P.S.

      I notice that the two generated codes are slightly different. (It isn’t a problem.)

      CASE A: without -mdfpu

      rx-elf-gcc -std=gnu99 -O3 -misa=v3 -mtfu=intrinsic,mathlib -Wa,-adln=rxv3_tfu_sinf_cosf.lst -c rxv3_tfu_sinf_cosf.c

      1 .file “rxv3_tfu_sinf_cosf.c”
      2 .section P,”ax”
      3 .global _rot
      5 _rot:
      6 0000 7E A7 push.l r7
      7 0002 FB FE 14 14 08 mov.L #0x81414, r15
      8 0007 E3 F1 mov.L r1, [r15]
      9 0009 FB EE 10 14 08 mov.L #0x81410, r14
      10 000e EC F4 mov.L [r15], r4
      11 0010 A8 2F mov.L 4[r2], r7
      12 0012 E7 E1 01 mov.L r1, 4[r14]
      13 0015 EC E5 mov.L [r14], r5
      14 0017 70 44 00 00 00 80 add #-2147483648, r4 <– HERE
      15 001d FC 8F 74 fmul r7, r4
      16 0020 EC 27 mov.L [r2], r7
      17 0022 FC 8F 75 fmul r7, r5
      18 0025 FC 8B 45 fadd r4, r5 <– HERE
      19 0028 E3 35 mov.L r5, [r3]
      20 002a E3 F1 mov.L r1, [r15]
      21 002c EC F4 mov.L [r15], r4
      22 002e EC 2F mov.L [r2], r15
      23 0030 E7 E1 01 mov.L r1, 4[r14]
      24 0033 EC E5 mov.L [r14], r5
      25 0035 A8 2A mov.L 4[r2], r2
      26 0037 FC 8F F4 fmul r15, r4
      27 003a FC 8F 25 fmul r2, r5
      28 003d FC 8B 45 fadd r4, r5
      29 0040 A0 3D mov.L r5, 4[r3]
      30 0042 3F 77 01 rtsd #4, r7-r7
      32 .ident "GCC: (GCC_Build_20221126) 8.3.0.202204-GNURX 20190222"

      CASE B: with -mdfp

      rx-elf-gcc -std=gnu99 -O3 -misa=v3 -mdfpu -mtfu=intrinsic,mathlib -Wa,-adln=rxv3_tfu_sinf_cosf.lst -c rxv3_tfu_sinf_cosf.c

      1 .file "rxv3_tfu_sinf_cosf.c"
      2 .section P,"ax"
      3 .global _rot
      5 _rot:
      6 0000 7E A7 push.l r7
      7 0002 FB EE 14 14 08 mov.L #0x81414, r14
      8 0007 E3 E1 mov.L r1, [r14]
      9 0009 FB 4E 10 14 08 mov.L #0x81410, r4
      10 000e EC EF mov.L [r14], r15
      11 0010 A8 2F mov.L 4[r2], r7
      12 0012 A0 49 mov.L r1, 4[r4]
      13 0014 EC 45 mov.L [r4], r5
      14 0016 FC 8F 7F fmul r7, r15
      15 0019 EC 27 mov.L [r2], r7
      16 001b FC 8F 75 fmul r7, r5
      17 001e FC 83 F5 fsub r15, r5 <– HERE
      18 0021 E3 35 mov.L r5, [r3]
      19 0023 E3 E1 mov.L r1, [r14]
      20 0025 EC EE mov.L [r14], r14
      21 0027 EC 2F mov.L [r2], r15
      22 0029 A0 49 mov.L r1, 4[r4]
      23 002b EC 45 mov.L [r4], r5
      24 002d A8 2C mov.L 4[r2], r4
      25 002f FC 8F FE fmul r15, r14
      26 0032 FC 8F 45 fmul r4, r5
      27 0035 FC 8B E5 fadd r14, r5
      28 0038 A0 3D mov.L r5, 4[r3]
      29 003a 3F 77 01 rtsd #4, r7-r7
      31 .ident "GCC: (GCC_Build_20221126) 8.3.0.202204-GNURX 20190222"

      Best regards,
      NoMaY