Delphi SSE优化算法应用之三(CRC-32C (Castagnoli) 校验算法优化)
作者:CodeGame
.
CRC-32C (Castagnoli) 算法是 iSCSI 和SCTP 数据校验的算法,和常用CRC-32-IEEE 802.3算法所不同的是多项式常数CRC32C是0x1EDC6F41 ,CRC32是0x04C11DB7 ,也就是说由此生成的CRC表不同外算法是一模一样.
CRC32 常规算法如下:
1. function _CRC32CX86(Data: PByte; aLength: Integer): DWORD;
2. const
3. _CRC32CTable: array[0..255] of DWORD = (
4. $00000000, $F26B8303, $E13B70F7, $1350F3F4, //CRC32C Table
5. $C79A971F, $35F1141C, $26A1E7E8, $D4CA64EB,
6. $8AD958CF, $78B2DBCC, $6BE22838, $9989AB3B,
7. $4D43CFD0, $BF284CD3, $AC78BF27, $5E133C24,
8. $105EC76F, $E235446C, $F165B798, $030E349B,
9. $D7C45070, $25AFD373, $36FF2087, $C494A384,
10. $9A879FA0, $68EC1CA3, $7BBCEF57, $89D76C54,
11. $5D1D08BF, $AF768BBC, $BC267848, $4E4DFB4B,
12. $20BD8EDE, $D2D60DDD, $C186FE29, $33ED7D2A,
13. $E72719C1, $154C9AC2, $061C6936, $F477EA35,
14. $AA64D611, $580F5512, $4B5FA6E6, $B93425E5,
15. $6DFE410E, $9F95C20D, $8CC531F9, $7EAEB2FA,
16. $30E349B1, $C288CAB2, $D1D83946, $23B3BA45,
17. $F779DEAE, $05125DAD, $1642AE59, $E4292D5A,
18. $BA3A117E, $4851927D, $5B016189, $A96AE28A,
19. $7DA08661, $8FCB0562, $9C9BF696, $6EF07595,
20. $417B1DBC, $B3109EBF, $A0406D4B, $522BEE48,
21. $86E18AA3, $748A09A0, $67DAFA54, $95B17957,
22. $CBA24573, $39C9C670, $2A993584, $D8F2B687,
23. $0C38D26C, $FE53516F, $ED03A29B, $1F682198,
24. $5125DAD3, $A34E59D0, $B01EAA24, $42752927,
25. $96BF4DCC, $64D4CECF, $77843D3B, $85EFBE38,
26. $DBFC821C, $2997011F, $3AC7F2EB, $C8AC71E8,
27. $1C661503, $EE0D9600, $FD5D65F4, $0F36E6F7,
28. $61C69362, $93AD1061, $80FDE395, $72966096,
29. $A65C047D, $5437877E, $4767748A, $B50CF789,
30. $EB1FCBAD, $197448AE, $0A24BB5A, $F84F3859,
31. $2C855CB2, $DEEEDFB1, $CDBE2C45, $3FD5AF46,
32. oracle切割字符串函数$7198540D, $83F3D70E, $90A324FA, $62C8A7F9,
33. $B602C312, $44694011, $5739B3E5, $A55230E6,
34. $FB410CC2, $092A8FC1, $1A7A7C35, $E811FF36,
35. $3CDB9BDD, $CEB018DE, $DDE0EB2A, $2F8B6829,
36. $82F63B78, $709DB87B, $63CD4B8F, $91A6C88C,
37. $456CAC67, $B7072F64, $A457DC90, $563C5F93,
38. $082F63B7, $FA44E0B4, $E9141340, $1B7F9043,
39. $CFB5F4A8, $3DDE77AB, $2E8E845F, $DCE5075C,
40. $92A8FC17, $60C37F14, $73938CE0, $81F80FE3,
41. $55326B08, $A759E80B, $B4091BFF, $466298FC,
42. $1871A4D8, $EA1A27DB, $F94AD42F, $0B21572C,
43. $DFEB33C7, $2D80B0C4, $3ED04330, $CCBBC033,
44. $A24BB5A6, $502036A5, $4370C551, $B11B4652,
45. $65D122B9, $97BAA1BA, $84EA524E, $7681D14D,
46. $2892ED69, $DAF96E6A, $C9A99D9E, $3BC21E9D,
47. $EF087A76, $1D63F975, $0E330A81, $FC588982,
48. $B21572C9, $407EF1CA, $532E023E, $A145813D,
49. $758FE5D6, $87E466D5, $94B49521, $66DF1622,
50. $38CC2A06, $CAA7A905, $D9F75AF1, $2B9CD9F2,
51. $FF56BD19, $0D3D3E1A, $1E6DCDEE, $EC064EED,
52. $C38D26C4, $31E6A5C7, $22B65633, $D0DDD530,
53. $0417B1DB, $F67C32D8, $E52CC12C, $1747422F,
54. $49547E0B, $BB3FFD08, $A86F0EFC, $5A048DFF,
55. $8ECEE914, $7CA56A17, $6FF599E3, $9D9E1AE0,
56. $D3D3E1AB, $21B862A8, $32E8915C, $C083125F,
57. $144976B4, $E622F5B7, $F5720643, $07198540,
58. $590AB964, $AB613A67, $B831C993, $4A5A4A90,
59. $9E902E7B, $6CFBAD78, $7FAB5E8C, $8DC0DD8F,
60. $E330A81A, $115B2B19, $020BD8ED, $F0605BEE,
61. $24AA3F05, $D6C1BC06, $C5914FF2, $37FACCF1,
62. $69E9F0D5, $9B8273D6, $88D28022, $7AB90321,
63. $AE7367CA, $5C18E4C9, $4F48173D, $BD23943E,
64. $F36E6F75, $0105EC76, $12551F82, $E03E9C81,
65. $34F4F86A, $C69F7B69, $D5CF889D, $27A40B9E,
66. $79B737BA, $8BDCB4B9, $988C474D, $6AE7C44E,
67. $BE2DA0A5, $4C4623A6, $5F16D052, $AD7D5351);
68. var
69. i: Integer;
70. begin
71. Result := $FFFFFFFF;
72. for I := 0 to aLength - 1 do
73. begin
74. Result := (Result shr 8) xor _CRC32CTable[(Result and $FF) xor Data^];
75. Inc(Data);
76. end;
77. Result := not Result;
78. end;
CRC32C使用SSE4.2硬件指令优化算法部分代码如下:
1. function _CRC32CSSE(Data: PByte; aLength: Integer): DWORD;
2. asm
3. push esi
4. push edx
5. push ecx
6. mov esi,eax
7. mov eax,$FFFFFFFF
8. test edx,edx
9. jz @Exit
10. test esi,esi
11. jz @Exit
12. mov ecx,edx
13. shr ecx, 2
14. test ecx,ecx
15. jz @Exit
16. xor edx,edx
17. @Alignment:
18. crc32 eax,[edx*4+esi]
19. inc edx
20. cmp edx,ecx
21. jb @Alignment
22. @Exit:
23. not eax
24. pop ecx
25. pop edx
26. pop esi
27. end;
以上2个不同实现方式在Intel Core i7 720QM 1.60GHz CPU上测试成绩如下:
(数据采用随机算法生成,1M*100表示使用1M数据进行100次重复计算,数据量相当于100M)
--------------------------------------------------------------------------------------
| 数据量 | 常规算法时间 | 优化算法时间 | 快出百分比 |
--------------------------------------------------------------------------------------
| 1M *100 | X86 Time:390ms | SSE Time:32ms | 1218% |
--------------------------------------------------------------------------------------
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
发表评论