Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pg_crc32c_sse42.c
4 : * Compute CRC-32C checksum using Intel SSE 4.2 instructions.
5 : *
6 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/port/pg_crc32c_sse42.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "c.h"
16 :
17 : #include "port/pg_crc32c.h"
18 :
19 : #include <nmmintrin.h>
20 :
21 : pg_crc32c
22 6320495 : pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len)
23 : {
24 6320495 : const unsigned char *p = data;
25 6320495 : const unsigned char *pend = p + len;
26 :
27 : /*
28 : * Process eight bytes of data at a time.
29 : *
30 : * NB: We do unaligned accesses here. The Intel architecture allows that,
31 : * and performance testing didn't show any performance gain from aligning
32 : * the begin address.
33 : */
34 : #ifdef __x86_64__
35 : while (p + 8 <= pend)
36 : {
37 : crc = (uint32) _mm_crc32_u64(crc, *((const uint64 *) p));
38 : p += 8;
39 : }
40 :
41 : /* Process remaining full four bytes if any */
42 : if (p + 4 <= pend)
43 : {
44 : crc = _mm_crc32_u32(crc, *((const unsigned int *) p));
45 : p += 4;
46 : }
47 : #else
48 :
49 : /*
50 : * Process four bytes at a time. (The eight byte instruction is not
51 : * available on the 32-bit x86 architecture).
52 : */
53 54752882 : while (p + 4 <= pend)
54 : {
55 84223784 : crc = _mm_crc32_u32(crc, *((const unsigned int *) p));
56 42111892 : p += 4;
57 : }
58 : #endif /* __x86_64__ */
59 :
60 : /* Process any remaining bytes one at a time. */
61 19606382 : while (p < pend)
62 : {
63 13930784 : crc = _mm_crc32_u8(crc, *p);
64 6965392 : p++;
65 : }
66 :
67 6320495 : return crc;
68 : }
|