Quantcast
Channel: GCC -msse2 does not generate SIMD code - Stack Overflow
Viewing all articles
Browse latest Browse all 2

GCC -msse2 does not generate SIMD code

$
0
0

I am trying to figure out why g++ does not generate a SIMD code.

Info GCC / OS / CPU:

$ gcc -vgcc version 4.8.2 (Ubuntu 4.8.2-19ubuntu1)$ cat /proc/cpuinfo...model name  : Intel(R) Core(TM)2 Duo CPU     P8600  @ 2.40GHz... 

and here is my C++ code:

#include <iostream>#include <cstdlib>//function that fills an array with random numberstemplate<class T>void fillArray(T *array, int n){    srand(1);    for (int i = 0; i < n; i++) {        array[i] = (float) (rand() % 10);    }}// function that computes the dotprod of two vectors (loop unrolled)float dotCPP(float *src1, float *src2, int n){    float dest = 0;    for (int i = 0; i < n; i+=2) {        dest += (src1[i] * src2[i]) + (src1[i+1] * src2[i+1]);                    }    return dest;}int main(int argc, char *argv[]){    const int n = 1200000;               float *a = new float[n];   //allocate data on the heap    float something_else;      //store result    fillArray<float>(a,n);     //function that fills the array with random numbers    something_else = dotCPP(a, a, n);  //call function and store return value    return 0;}  

I compile the code with:

makefile:CXX = g++CXXFLGS = -g -Wall -std=c++11 -msse2 -O3SRC = main.o dot.o EXEC = dot$(EXEC): $(SRC)    $(CXX) $(CXXFLGS) $(SRC) -o $(EXEC)main.o: dot.cpp    $(CXX) $(CXXFLGS) -c dot.cpp -o main.o

and use gdb to inspect the generated code:

$gdb dot... (gdb) b dotCPP(gdb) r...(gdb) disassDump of assembler code for function dotCPP(float*, float*, int):=> 0x08048950 <+0>:     push   %ebx   0x08048951 <+1>:     mov    0x10(%esp),%ebx   0x08048955 <+5>:     mov    0x8(%esp),%edx   0x08048959 <+9>:     mov    0xc(%esp),%ecx   0x0804895d <+13>:    test   %ebx,%ebx   0x0804895f <+15>:    jle    0x8048983 <dotCPP(float*, float*, int)+51>   0x08048961 <+17>:    xor    %eax,%eax   0x08048963 <+19>:    fldz      0x08048965 <+21>:    lea    0x0(%esi),%esi   0x08048968 <+24>:    flds   (%edx,%eax,4)   0x0804896b <+27>:    fmuls  (%ecx,%eax,4)   0x0804896e <+30>:    flds   0x4(%edx,%eax,4)   0x08048972 <+34>:    fmuls  0x4(%ecx,%eax,4)   0x08048976 <+38>:    add    $0x2,%eax   0x08048979 <+41>:    cmp    %eax,%ebx   0x0804897b <+43>:    faddp  %st,%st(1)   0x0804897d <+45>:    faddp  %st,%st(1)   0x0804897f <+47>:    jg     0x8048968 <dotCPP(float*, float*, int)+24>   0x08048981 <+49>:    pop    %ebx   0x08048982 <+50>:    ret       0x08048983 <+51>:    fldz      0x08048985 <+53>:    pop    %ebx   0x08048986 <+54>:    ret    End of assembler dump.

Now am I missing something or should gcc make use of the xmm registers?

I would really appreciate any suggestions that would help me understand why gcc does not generate code that uses the xmm registers.

Please let me know if you need further information on anything.


Viewing all articles
Browse latest Browse all 2

Latest Images

Trending Articles





Latest Images