• 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30

iPhone 5 Retina の解像度で画面一杯の画像を左に1ドットずらす事を考える

つまり 320 x 568 x 4 X 4 = 2908160 バイト数分のコピー

ruby>>
for/memmove 機種 Debug Release (nano sec)
int32 for forward 5 7798625 3169792
int32 for reverse 5 6819542 2541750
float for forward 5 8200625 2440875
float for reverse 5 5783625 2477167
int32 mem forward 5 1105375 1394833
int32 mem reverse 5 894833 1158125
float mem forward 5 1085958 1128250
float mem reverse 5 896542 946042

int32 for forward 5s 4817875 943250
int32 for reverse 5s 4419667 976417
float for forward 5s 4793042 893958
float for reverse 5s 4415625 934625
int32 mem forward 5s 593083 628042
int32 mem reverse 5s 644542 582750
float mem forward 5s 565583 605958
float mem reverse 5s 561875 594792

<<--

ちなみに Accelerate Framework の vDSP_vsaddX で0を足すという強引な手でやると、以下のような結果。(オーバーラップしているときは使える保証がないので参考までに)
5s の Accelerate が何故かすごく早くなっていて、OpenGL を使ったときのような発熱がある。GPU ベースに書き直したのか?

ruby>>
int32 DSP reverse 5 1764667 2197000
int32 DSP reverse 5s 329292 393375
float DSP reverse 5 1083958 1291875
float DSP reverse 5s 325875 421125

<<--

ソース

ruby>>
inline void
SpeedTest()
{
// Normal
{ int32_t wNum = 320 * 568 * 4;
int32_t* w = new int32_t[ wNum + 1 ];
for ( int32_t i = 0; i <= wNum; i++ ) w[ i ] = i;
Timer wTimer;
for ( int32_t i = 0; i < wNum; i++ ) w[ i ] = w[ i + 1 ];
NSLog( @"Int32 Forward:%zd", wTimer.Nano() );
for ( size_t i = 0; i < wNum; i++ ) assert( w[ i ] == i + 1 );
delete[] w;
}
{ int32_t wNum = 320 * 568 * 4;
int32_t* w = new int32_t[ wNum + 1 ];
for ( int32_t i = 0; i <= wNum; i++ ) w[ i ] = i;
Timer wTimer;
int32_t wCounter = wNum;
while ( wCounter-- ) w[ wCounter + 1 ] = w[ wCounter ];
NSLog( @"Int32 Reverse:%zd", wTimer.Nano() );
for ( size_t i = 1; i <= wNum; i++ ) assert( w[ i ] == i - 1 );
delete[] w;
}
{ int32_t wNum = 320 * 568 * 4;
float* w = new float[ wNum + 1 ];
for ( int32_t i = 0; i <= wNum; i++ ) w[ i ] = i;
Timer wTimer;
for ( int32_t i = 0; i < wNum; i++ ) w[ i ] = w[ i + 1 ];
NSLog( @"Float Forward:%zd", wTimer.Nano() );
for ( size_t i = 0; i < wNum; i++ ) assert( w[ i ] == i + 1 );
delete[] w;
}
{ int32_t wNum = 320 * 568 * 4;
float* w = new float[ wNum + 1 ];
for ( int32_t i = 0; i <= wNum; i++ ) w[ i ] = i;
Timer wTimer;
int32_t wCounter = wNum;
while ( wCounter-- ) w[ wCounter + 1 ] = w[ wCounter ];
NSLog( @"Float Reverse:%zd", wTimer.Nano() );
for ( size_t i = 1; i <= wNum; i++ ) assert( w[ i ] == i - 1 );
delete[] w;
}
// Memmove
{ int32_t wNum = 320 * 568 * 4;
int32_t* w = new int32_t[ wNum + 1 ];
for ( int32_t i = 0; i <= wNum; i++ ) w[ i ] = i;
Timer wTimer;
memmove( w, w + 1, wNum * 4 );
NSLog( @"Int32 Memmove Forward:%zd", wTimer.Nano() );
for ( size_t i = 0; i < wNum; i++ ) assert( w[ i ] == i + 1 );
delete[] w;
}
{ int32_t wNum = 320 * 568 * 4;
int32_t* w = new int32_t[ wNum + 1 ];
for ( int32_t i = 0; i <= wNum; i++ ) w[ i ] = i;
Timer wTimer;
memmove( w + 1, w, wNum * 4 );
NSLog( @"Int32 Memmove Reverse:%zd", wTimer.Nano() );
for ( size_t i = 1; i <= wNum; i++ ) assert( w[ i ] == i - 1 );
delete[] w;
}
{ int32_t wNum = 320 * 568 * 4;
float* w = new float[ wNum + 1 ];
for ( int32_t i = 0; i <= wNum; i++ ) w[ i ] = i;
Timer wTimer;
memmove( w, w + 1, wNum * 4 );
NSLog( @"Float Memmove Forward:%zd", wTimer.Nano() );
for ( size_t i = 0; i < wNum; i++ ) assert( w[ i ] == i + 1 );
delete[] w;
}
{ int32_t wNum = 320 * 568 * 4;
float* w = new float[ wNum + 1 ];
for ( int32_t i = 0; i <= wNum; i++ ) w[ i ] = i;
Timer wTimer;
memmove( w + 1, w, wNum * 4 );
NSLog( @"Float Memmove Reverse:%zd", wTimer.Nano() );
for ( size_t i = 1; i <= wNum; i++ ) assert( w[ i ] == i - 1 );
delete[] w;
}
// DSP
{ int32_t wNum = 320 * 568 * 4;
int* w = new int[ wNum + 1 ];
for ( int32_t i = 0; i <= wNum; i++ ) w[ i ] = i;
Timer wTimer;
int wZero = 0;
vDSP_vsaddi( w + 1, 1, &wZero, w, 1, wNum );
NSLog( @"Int32 vDSP_vsaddi Rev:%zd", wTimer.Nano() );
for ( size_t i = 0; i < wNum; i++ ) assert( w[ i ] == i + 1 );
delete[] w;
}
{ int32_t wNum = 320 * 568 * 4;
float* w = new float[ wNum + 1 ];
for ( int32_t i = 0; i <= wNum; i++ ) w[ i ] = i;
Timer wTimer;
float wZero = 0;
vDSP_vsadd( w + 1, 1, &wZero, w, 1, wNum );
NSLog( @"Float vDSP_vsadd Rev:%zd", wTimer.Nano() );
for ( size_t i = 0; i < wNum; i++ ) assert( w[ i ] == i + 1 );
delete[] w;
}
}

<<--

posted by Saturn Saturn on Tue 8 Apr 2014 at 05:46 with 0 comments