I made tests on my E5 -1620 v3 CPU using VS2013 and tested that the standard C++11 function hypot takes a 140% more time to finish than using a function or a #define function. I also gave an opportunity to the compiler to parallelize the calculus in a second round test.
As result I obtained:
C++11 hypot: 55 MOPs (mega operations per second)
_hypo function: 81-82 MOPs
#define func. : 81-83 MOPs
Parallelized 10 operations each:
C++11 hypot: 152 MOPs
_hypo function: 268 MOPs
#define func. : 240 MOPs
At least the results of the three functions where exactly the same.
Is that a problem of the VS or it could happens also on a linux+gcc system?
It could happens on a AMD ryzen also?
UPDATED:
I tested also in linux with old slow AMD processor:
The parallelized functions made following results:
C++11 hypot: doubled the speed
_hypo function: 30% speed incrtease
#define function: 3x speed (double than the _hypo)
What I have tried:
This new version uses <chronos> and works in linux and AMD system also:
#include <iostream>
#include <math.h>
#include <chrono>
#ifdef __linux
#include <unistd.h>
#else
#pragma warning(disable:4996) //disable deprecateds
#endif
using namespace std;
typedef unsigned char uchar;
auto start_time=chrono::system_clock::now(),stop_time=start_time;char null_char='\0';
void timer(char *title=&null_char,int data_size=1){ stop_time= chrono::system_clock::now();double us=(double) chrono::duration_cast<chrono::microseconds>(stop_time - start_time).count(); if (*title) printf("%s time = %7lgms = %7lg MOPs\n",title,(double) us*1e-3, (double)data_size/us); start_time= chrono::system_clock::now(); }
double _hypo(double x,double y)
{
x*=x;y*=y;return sqrt(x+y);
}
#define _HYPO(x,y) (sqrt((x)*(x)+(y)*(y)))
int main()
{
int N=10000000;double x,y=12.22;
timer();
for (int i=0;i<N;i++)
{
x=(double) i+1;
y=hypot(x,y);y=hypot(x,y);
y=hypot(x,y);y=hypot(x,y);
y=hypot(x,y);y=hypot(x,y);
y=hypot(x,y);y=hypot(x,y);
y=hypot(x,y);y=hypot(x,y);
}
timer("hypot ",10*N);
for (int i=0;i<N;i++)
{
x=(double) i+1;
y=_hypo(x,y);y=_hypo(x,y);
y=_hypo(x,y);y=_hypo(x,y);
y=_hypo(x,y);y=_hypo(x,y);
y=_hypo(x,y);y=_hypo(x,y);
y=_hypo(x,y);y=_hypo(x,y);
}
timer("_hypo ",10*N);
for (int i=0;i<N;i++)
{
x=(double) i+1;
y=_HYPO(x,y);y=_HYPO(x,y);
y=_HYPO(x,y);y=_HYPO(x,y);
y=_HYPO(x,y);y=_HYPO(x,y);
y=_HYPO(x,y);y=_HYPO(x,y);
y=_HYPO(x,y);y=_HYPO(x,y);
}
timer("#define ",10*N);
for (int i=0;i<N;i++)
{
x=(double) i+1;
y=hypot(x,y)+hypot(0.5*x,y)+hypot(0.4*x,y)+hypot(0.3*x,y)+hypot(0.2*x,y)+
hypot(x,y)+hypot(0.5*x,y)+hypot(0.4*x,y)+hypot(0.3*x,y)+hypot(0.2*x,y);
}
timer("hypot par ",10*N);
for (int i=0;i<N;i++)
{
x=(double) i+1;
y=_hypo(x,y) +_hypo(0.5 *x,y)+_hypo(0.4 *x,y)+_hypo(0.3 *x,y)+_hypo(0.2 *x,y)+
_hypo(x*0.11,y)+_hypo(0.13*x,y)+_hypo(0.41*x,y)+_hypo(0.31*x,y)+_hypo(0.23*x,y);
}
timer("_hypo par ",10*N);
for (int i=0;i<N;i++)
{
x=(double) i+1;
y=_HYPO(x,y) +_HYPO(0.5 *x,y)+_HYPO(0.4 *x,y)+_HYPO(0.3 *x,y)+_HYPO(0.2 *x,y)+
_HYPO(x*0.11,y)+_HYPO(0.13*x,y)+_HYPO(0.41*x,y)+_HYPO(0.31*x,y)+_HYPO(0.23*x,y);
}
timer("_HYPO par ",10*N);
x=1.12345;y=y+=.77777732;
if ((hypot(x,y)!=_hypo(x,y))||(hypot(x,y)!=_HYPO(x,y)))
cout<<"ERROR: "<<hypot(x,y)<<"!="<<_hypo(x,y) <<" or "<<hypot(x,y)<<"!="<<_HYPO(x,y)<<endl;
else cout<<"hypot(x,y)==_hypo(x,y)==HYPO(x,y)"<<endl;
cout<<"===END==="<<endl;getchar();
}