iPhone 5 Retina の解像度で画面一杯の画像を左に1ドットずらす事を考える
つまり 320 x 568 x 4 X 4 = 2908160 バイト数分のコピー
1 for/memmove 機種 Debug Release (nano sec)
2 int32 for forward 5 7798625 3169792
3 int32 for reverse 5 6819542 2541750
4 float for forward 5 8200625 2440875
5 float for reverse 5 5783625 2477167
6 int32 mem forward 5 1105375 1394833
7 int32 mem reverse 5 894833 1158125
8 float mem forward 5 1085958 1128250
9 float mem reverse 5 896542 946042
10
11
12 int32 for forward 5s 4817875 943250
13 int32 for reverse 5s 4419667 976417
14 float for forward 5s 4793042 893958
15 float for reverse 5s 4415625 934625
16 int32 mem forward 5s 593083 628042
17 int32 mem reverse 5s 644542 582750
18 float mem forward 5s 565583 605958
19 float mem reverse 5s 561875 594792
20
ちなみに Accelerate Framework の vDSP_vsaddX で0を足すという強引な手でやると、以下のような結果。(オーバーラップしているときは使える保証がないので参考までに)
5s の Accelerate が何故かすごく早くなっていて、OpenGL を使ったときのような発熱がある。GPU ベースに書き直したのか?
1 int32 DSP reverse 5 1764667 2197000
2 int32 DSP reverse 5s 329292 393375
3 float DSP reverse 5 1083958 1291875
4 float DSP reverse 5s 325875 421125
5
ソース
1 inline void
2 SpeedTest()
3 {
4 // Normal
5 { int32_t wNum = 320 * 568 * 4;
6 int32_t* w = new int32_t[ wNum + 1 ];
7 for ( int32_t i = 0; i <= wNum; i++ ) w[ i ] = i;
8 Timer wTimer;
9 for ( int32_t i = 0; i < wNum; i++ ) w[ i ] = w[ i + 1 ];
10 NSLog( @"Int32 Forward:%zd", wTimer.Nano() );
11 for ( size_t i = 0; i < wNum; i++ ) assert( w[ i ] == i + 1 );
12 delete[] w;
13 }
14 { int32_t wNum = 320 * 568 * 4;
15 int32_t* w = new int32_t[ wNum + 1 ];
16 for ( int32_t i = 0; i <= wNum; i++ ) w[ i ] = i;
17 Timer wTimer;
18 int32_t wCounter = wNum;
19 while ( wCounter-- ) w[ wCounter + 1 ] = w[ wCounter ];
20 NSLog( @"Int32 Reverse:%zd", wTimer.Nano() );
21 for ( size_t i = 1; i <= wNum; i++ ) assert( w[ i ] == i - 1 );
22 delete[] w;
23 }
24 { int32_t wNum = 320 * 568 * 4;
25 float* w = new float[ wNum + 1 ];
26 for ( int32_t i = 0; i <= wNum; i++ ) w[ i ] = i;
27 Timer wTimer;
28 for ( int32_t i = 0; i < wNum; i++ ) w[ i ] = w[ i + 1 ];
29 NSLog( @"Float Forward:%zd", wTimer.Nano() );
30 for ( size_t i = 0; i < wNum; i++ ) assert( w[ i ] == i + 1 );
31 delete[] w;
32 }
33 { int32_t wNum = 320 * 568 * 4;
34 float* w = new float[ wNum + 1 ];
35 for ( int32_t i = 0; i <= wNum; i++ ) w[ i ] = i;
36 Timer wTimer;
37 int32_t wCounter = wNum;
38 while ( wCounter-- ) w[ wCounter + 1 ] = w[ wCounter ];
39 NSLog( @"Float Reverse:%zd", wTimer.Nano() );
40 for ( size_t i = 1; i <= wNum; i++ ) assert( w[ i ] == i - 1 );
41 delete[] w;
42 }
43 // Memmove
44 { int32_t wNum = 320 * 568 * 4;
45 int32_t* w = new int32_t[ wNum + 1 ];
46 for ( int32_t i = 0; i <= wNum; i++ ) w[ i ] = i;
47 Timer wTimer;
48 memmove( w, w + 1, wNum * 4 );
49 NSLog( @"Int32 Memmove Forward:%zd", wTimer.Nano() );
50 for ( size_t i = 0; i < wNum; i++ ) assert( w[ i ] == i + 1 );
51 delete[] w;
52 }
53 { int32_t wNum = 320 * 568 * 4;
54 int32_t* w = new int32_t[ wNum + 1 ];
55 for ( int32_t i = 0; i <= wNum; i++ ) w[ i ] = i;
56 Timer wTimer;
57 memmove( w + 1, w, wNum * 4 );
58 NSLog( @"Int32 Memmove Reverse:%zd", wTimer.Nano() );
59 for ( size_t i = 1; i <= wNum; i++ ) assert( w[ i ] == i - 1 );
60 delete[] w;
61 }
62 { int32_t wNum = 320 * 568 * 4;
63 float* w = new float[ wNum + 1 ];
64 for ( int32_t i = 0; i <= wNum; i++ ) w[ i ] = i;
65 Timer wTimer;
66 memmove( w, w + 1, wNum * 4 );
67 NSLog( @"Float Memmove Forward:%zd", wTimer.Nano() );
68 for ( size_t i = 0; i < wNum; i++ ) assert( w[ i ] == i + 1 );
69 delete[] w;
70 }
71 { int32_t wNum = 320 * 568 * 4;
72 float* w = new float[ wNum + 1 ];
73 for ( int32_t i = 0; i <= wNum; i++ ) w[ i ] = i;
74 Timer wTimer;
75 memmove( w + 1, w, wNum * 4 );
76 NSLog( @"Float Memmove Reverse:%zd", wTimer.Nano() );
77 for ( size_t i = 1; i <= wNum; i++ ) assert( w[ i ] == i - 1 );
78 delete[] w;
79 }
80 // DSP
81 { int32_t wNum = 320 * 568 * 4;
82 int* w = new int[ wNum + 1 ];
83 for ( int32_t i = 0; i <= wNum; i++ ) w[ i ] = i;
84 Timer wTimer;
85 int wZero = 0;
86 vDSP_vsaddi( w + 1, 1, &wZero, w, 1, wNum );
87 NSLog( @"Int32 vDSP_vsaddi Rev:%zd", wTimer.Nano() );
88 for ( size_t i = 0; i < wNum; i++ ) assert( w[ i ] == i + 1 );
89 delete[] w;
90 }
91 { int32_t wNum = 320 * 568 * 4;
92 float* w = new float[ wNum + 1 ];
93 for ( int32_t i = 0; i <= wNum; i++ ) w[ i ] = i;
94 Timer wTimer;
95 float wZero = 0;
96 vDSP_vsadd( w + 1, 1, &wZero, w, 1, wNum );
97 NSLog( @"Float vDSP_vsadd Rev:%zd", wTimer.Nano() );
98 for ( size_t i = 0; i < wNum; i++ ) assert( w[ i ] == i + 1 );
99 delete[] w;
100 }
101 }
102