@ -28,8 +28,10 @@
// compromising on hash quality.
// compromising on hash quality.
# include <algorithm>
# include <algorithm>
# include <string.h> // for memcpy and memset
# include <cstring>
# include "cityhash.h"
# include <utility>
# include "common/cityhash.h"
# include "common/swap.h"
# include "common/swap.h"
// #include "config.h"
// #include "config.h"
@ -42,21 +44,17 @@
using namespace std ;
using namespace std ;
typedef uint8_t uint8 ;
typedef uint32_t uint32 ;
typedef uint64_t uint64 ;
namespace Common {
namespace Common {
static u int64 UNALIGNED_LOAD 64( const char * p ) {
static u64 unaligned_load64 ( const char * p ) {
u int 64 result ;
u64 result ;
memcpy( & result , p , sizeof ( result ) ) ;
std: : memcpy( & result , p , sizeof ( result ) ) ;
return result ;
return result ;
}
}
static u int32 UNALIGNED_LOAD 32( const char * p ) {
static u 32 unaligned_load 32( const char * p ) {
u int 32 result ;
u 32 result ;
memcpy( & result , p , sizeof ( result ) ) ;
std: : memcpy( & result , p , sizeof ( result ) ) ;
return result ;
return result ;
}
}
@ -76,64 +74,64 @@ static uint32 UNALIGNED_LOAD32(const char* p) {
# endif
# endif
# endif
# endif
static u int 64 Fetch64 ( const char * p ) {
static u 64 Fetch64 ( const char * p ) {
return uint64_in_expected_order ( UNALIGNED_LOAD 64( p ) ) ;
return uint64_in_expected_order ( unaligned_load 64( p ) ) ;
}
}
static u int 32 Fetch32 ( const char * p ) {
static u 32 Fetch32 ( const char * p ) {
return uint32_in_expected_order ( UNALIGNED_LOAD 32( p ) ) ;
return uint32_in_expected_order ( unaligned_load 32( p ) ) ;
}
}
// Some primes between 2^63 and 2^64 for various uses.
// Some primes between 2^63 and 2^64 for various uses.
static const u int 64 k0 = 0xc3a5c85c97cb3127ULL ;
static const expr u 64 k0 = 0xc3a5c85c97cb3127ULL ;
static const u int 64 k1 = 0xb492b66fbe98f273ULL ;
static const expr u 64 k1 = 0xb492b66fbe98f273ULL ;
static const u int 64 k2 = 0x9ae16a3b2f90404fULL ;
static const expr u 64 k2 = 0x9ae16a3b2f90404fULL ;
// Bitwise right rotate. Normally this will compile to a single
// Bitwise right rotate. Normally this will compile to a single
// instruction, especially if the shift is a manifest constant.
// instruction, especially if the shift is a manifest constant.
static u int 64 Rotate ( u int 64 val , int shift ) {
static u 64 Rotate ( u 64 val , int shift ) {
// Avoid shifting by 64: doing so yields an undefined result.
// Avoid shifting by 64: doing so yields an undefined result.
return shift = = 0 ? val : ( ( val > > shift ) | ( val < < ( 64 - shift ) ) ) ;
return shift = = 0 ? val : ( ( val > > shift ) | ( val < < ( 64 - shift ) ) ) ;
}
}
static u int 64 ShiftMix ( u int 64 val ) {
static u 64 ShiftMix ( u 64 val ) {
return val ^ ( val > > 47 ) ;
return val ^ ( val > > 47 ) ;
}
}
static u int 64 HashLen16 ( u int 64 u , u int 64 v ) {
static u 64 HashLen16 ( u 64 u , u 64 v ) {
return Hash128to64 ( u int128( u , v ) ) ;
return Hash128to64 ( u 128{ u , v } ) ;
}
}
static u int 64 HashLen16 ( u int 64 u , u int 64 v , u int 64 mul ) {
static u 64 HashLen16 ( u 64 u , u 64 v , u 64 mul ) {
// Murmur-inspired hashing.
// Murmur-inspired hashing.
u int 64 a = ( u ^ v ) * mul ;
u 64 a = ( u ^ v ) * mul ;
a ^ = ( a > > 47 ) ;
a ^ = ( a > > 47 ) ;
u int 64 b = ( v ^ a ) * mul ;
u 64 b = ( v ^ a ) * mul ;
b ^ = ( b > > 47 ) ;
b ^ = ( b > > 47 ) ;
b * = mul ;
b * = mul ;
return b ;
return b ;
}
}
static u int 64 HashLen0to16 ( const char * s , std : : size_t len ) {
static u 64 HashLen0to16 ( const char * s , size_t len ) {
if ( len > = 8 ) {
if ( len > = 8 ) {
u int 64 mul = k2 + len * 2 ;
u 64 mul = k2 + len * 2 ;
u int 64 a = Fetch64 ( s ) + k2 ;
u 64 a = Fetch64 ( s ) + k2 ;
u int 64 b = Fetch64 ( s + len - 8 ) ;
u 64 b = Fetch64 ( s + len - 8 ) ;
u int 64 c = Rotate ( b , 37 ) * mul + a ;
u 64 c = Rotate ( b , 37 ) * mul + a ;
u int 64 d = ( Rotate ( a , 25 ) + b ) * mul ;
u 64 d = ( Rotate ( a , 25 ) + b ) * mul ;
return HashLen16 ( c , d , mul ) ;
return HashLen16 ( c , d , mul ) ;
}
}
if ( len > = 4 ) {
if ( len > = 4 ) {
u int 64 mul = k2 + len * 2 ;
u 64 mul = k2 + len * 2 ;
u int 64 a = Fetch32 ( s ) ;
u 64 a = Fetch32 ( s ) ;
return HashLen16 ( len + ( a < < 3 ) , Fetch32 ( s + len - 4 ) , mul ) ;
return HashLen16 ( len + ( a < < 3 ) , Fetch32 ( s + len - 4 ) , mul ) ;
}
}
if ( len > 0 ) {
if ( len > 0 ) {
u int 8 a = s [ 0 ] ;
u 8 a = s [ 0 ] ;
u int 8 b = s [ len > > 1 ] ;
u 8 b = s [ len > > 1 ] ;
u int 8 c = s [ len - 1 ] ;
u 8 c = s [ len - 1 ] ;
u int 32 y = static_cast < u int 32> ( a ) + ( static_cast < u int 32> ( b ) < < 8 ) ;
u 32 y = static_cast < u 32> ( a ) + ( static_cast < u 32> ( b ) < < 8 ) ;
u int 32 z = static_cast < u int 32> ( len ) + ( static_cast < u int 32> ( c ) < < 2 ) ;
u 32 z = static_cast < u 32> ( len ) + ( static_cast < u 32> ( c ) < < 2 ) ;
return ShiftMix ( y * k2 ^ z * k0 ) * k2 ;
return ShiftMix ( y * k2 ^ z * k0 ) * k2 ;
}
}
return k2 ;
return k2 ;
@ -141,22 +139,21 @@ static uint64 HashLen0to16(const char* s, std::size_t len) {
// This probably works well for 16-byte strings as well, but it may be overkill
// This probably works well for 16-byte strings as well, but it may be overkill
// in that case.
// in that case.
static u int 64 HashLen17to32 ( const char * s , std : : size_t len ) {
static u 64 HashLen17to32 ( const char * s , size_t len ) {
u int 64 mul = k2 + len * 2 ;
u 64 mul = k2 + len * 2 ;
u int 64 a = Fetch64 ( s ) * k1 ;
u 64 a = Fetch64 ( s ) * k1 ;
u int 64 b = Fetch64 ( s + 8 ) ;
u 64 b = Fetch64 ( s + 8 ) ;
u int 64 c = Fetch64 ( s + len - 8 ) * mul ;
u 64 c = Fetch64 ( s + len - 8 ) * mul ;
u int 64 d = Fetch64 ( s + len - 16 ) * k2 ;
u 64 d = Fetch64 ( s + len - 16 ) * k2 ;
return HashLen16 ( Rotate ( a + b , 43 ) + Rotate ( c , 30 ) + d , a + Rotate ( b + k2 , 18 ) + c , mul ) ;
return HashLen16 ( Rotate ( a + b , 43 ) + Rotate ( c , 30 ) + d , a + Rotate ( b + k2 , 18 ) + c , mul ) ;
}
}
// Return a 16-byte hash for 48 bytes. Quick and dirty.
// Return a 16-byte hash for 48 bytes. Quick and dirty.
// Callers do best to use "random-looking" values for a and b.
// Callers do best to use "random-looking" values for a and b.
static pair < uint64 , uint64 > WeakHashLen32WithSeeds ( uint64 w , uint64 x , uint64 y , uint64 z , uint64 a ,
static pair < u64 , u64 > WeakHashLen32WithSeeds ( u64 w , u64 x , u64 y , u64 z , u64 a , u64 b ) {
uint64 b ) {
a + = w ;
a + = w ;
b = Rotate ( b + a + z , 21 ) ;
b = Rotate ( b + a + z , 21 ) ;
u int 64 c = a ;
u 64 c = a ;
a + = x ;
a + = x ;
a + = y ;
a + = y ;
b + = Rotate ( a , 44 ) ;
b + = Rotate ( a , 44 ) ;
@ -164,34 +161,34 @@ static pair<uint64, uint64> WeakHashLen32WithSeeds(uint64 w, uint64 x, uint64 y,
}
}
// Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty.
// Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty.
static pair < u int 64, u int 64> WeakHashLen32WithSeeds ( const char * s , u int 64 a , u int 64 b ) {
static pair < u 64, u 64> WeakHashLen32WithSeeds ( const char * s , u 64 a , u 64 b ) {
return WeakHashLen32WithSeeds ( Fetch64 ( s ) , Fetch64 ( s + 8 ) , Fetch64 ( s + 16 ) , Fetch64 ( s + 24 ) , a ,
return WeakHashLen32WithSeeds ( Fetch64 ( s ) , Fetch64 ( s + 8 ) , Fetch64 ( s + 16 ) , Fetch64 ( s + 24 ) , a ,
b ) ;
b ) ;
}
}
// Return an 8-byte hash for 33 to 64 bytes.
// Return an 8-byte hash for 33 to 64 bytes.
static u int 64 HashLen33to64 ( const char * s , std : : size_t len ) {
static u 64 HashLen33to64 ( const char * s , size_t len ) {
u int 64 mul = k2 + len * 2 ;
u 64 mul = k2 + len * 2 ;
u int 64 a = Fetch64 ( s ) * k2 ;
u 64 a = Fetch64 ( s ) * k2 ;
u int 64 b = Fetch64 ( s + 8 ) ;
u 64 b = Fetch64 ( s + 8 ) ;
u int 64 c = Fetch64 ( s + len - 24 ) ;
u 64 c = Fetch64 ( s + len - 24 ) ;
u int 64 d = Fetch64 ( s + len - 32 ) ;
u 64 d = Fetch64 ( s + len - 32 ) ;
u int 64 e = Fetch64 ( s + 16 ) * k2 ;
u 64 e = Fetch64 ( s + 16 ) * k2 ;
u int 64 f = Fetch64 ( s + 24 ) * 9 ;
u 64 f = Fetch64 ( s + 24 ) * 9 ;
u int 64 g = Fetch64 ( s + len - 8 ) ;
u 64 g = Fetch64 ( s + len - 8 ) ;
u int 64 h = Fetch64 ( s + len - 16 ) * mul ;
u 64 h = Fetch64 ( s + len - 16 ) * mul ;
u int 64 u = Rotate ( a + g , 43 ) + ( Rotate ( b , 30 ) + c ) * 9 ;
u 64 u = Rotate ( a + g , 43 ) + ( Rotate ( b , 30 ) + c ) * 9 ;
u int 64 v = ( ( a + g ) ^ d ) + f + 1 ;
u 64 v = ( ( a + g ) ^ d ) + f + 1 ;
u int 64 w = swap64 ( ( u + v ) * mul ) + h ;
u 64 w = swap64 ( ( u + v ) * mul ) + h ;
u int 64 x = Rotate ( e + f , 42 ) + c ;
u 64 x = Rotate ( e + f , 42 ) + c ;
u int 64 y = ( swap64 ( ( v + w ) * mul ) + g ) * mul ;
u 64 y = ( swap64 ( ( v + w ) * mul ) + g ) * mul ;
u int 64 z = e + f + c ;
u 64 z = e + f + c ;
a = swap64 ( ( x + z ) * mul + y ) + b ;
a = swap64 ( ( x + z ) * mul + y ) + b ;
b = ShiftMix ( ( z + a ) * mul + d + h ) * mul ;
b = ShiftMix ( ( z + a ) * mul + d + h ) * mul ;
return b + x ;
return b + x ;
}
}
u int 64 CityHash64 ( const char * s , std : : size_t len ) {
u 64 CityHash64 ( const char * s , size_t len ) {
if ( len < = 32 ) {
if ( len < = 32 ) {
if ( len < = 16 ) {
if ( len < = 16 ) {
return HashLen0to16 ( s , len ) ;
return HashLen0to16 ( s , len ) ;
@ -204,15 +201,15 @@ uint64 CityHash64(const char* s, std::size_t len) {
// For strings over 64 bytes we hash the end first, and then as we
// For strings over 64 bytes we hash the end first, and then as we
// loop we keep 56 bytes of state: v, w, x, y, and z.
// loop we keep 56 bytes of state: v, w, x, y, and z.
u int 64 x = Fetch64 ( s + len - 40 ) ;
u 64 x = Fetch64 ( s + len - 40 ) ;
u int 64 y = Fetch64 ( s + len - 16 ) + Fetch64 ( s + len - 56 ) ;
u 64 y = Fetch64 ( s + len - 16 ) + Fetch64 ( s + len - 56 ) ;
u int 64 z = HashLen16 ( Fetch64 ( s + len - 48 ) + len , Fetch64 ( s + len - 24 ) ) ;
u 64 z = HashLen16 ( Fetch64 ( s + len - 48 ) + len , Fetch64 ( s + len - 24 ) ) ;
pair < u int 64, u int 64> v = WeakHashLen32WithSeeds ( s + len - 64 , len , z ) ;
pair < u 64, u 64> v = WeakHashLen32WithSeeds ( s + len - 64 , len , z ) ;
pair < u int 64, u int 64> w = WeakHashLen32WithSeeds ( s + len - 32 , y + k1 , x ) ;
pair < u 64, u 64> w = WeakHashLen32WithSeeds ( s + len - 32 , y + k1 , x ) ;
x = x * k1 + Fetch64 ( s ) ;
x = x * k1 + Fetch64 ( s ) ;
// Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
// Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
len = ( len - 1 ) & ~ static_cast < s td: : s ize_t> ( 63 ) ;
len = ( len - 1 ) & ~ static_cast < s ize_t> ( 63 ) ;
do {
do {
x = Rotate ( x + y + v . first + Fetch64 ( s + 8 ) , 37 ) * k1 ;
x = Rotate ( x + y + v . first + Fetch64 ( s + 8 ) , 37 ) * k1 ;
y = Rotate ( y + v . second + Fetch64 ( s + 48 ) , 42 ) * k1 ;
y = Rotate ( y + v . second + Fetch64 ( s + 48 ) , 42 ) * k1 ;
@ -229,21 +226,21 @@ uint64 CityHash64(const char* s, std::size_t len) {
HashLen16 ( v . second , w . second ) + x ) ;
HashLen16 ( v . second , w . second ) + x ) ;
}
}
u int 64 CityHash64WithSeed ( const char * s , s td: : s ize_t len , u int 64 seed ) {
u 64 CityHash64WithSeed ( const char * s , s ize_t len , u 64 seed ) {
return CityHash64WithSeeds ( s , len , k2 , seed ) ;
return CityHash64WithSeeds ( s , len , k2 , seed ) ;
}
}
u int 64 CityHash64WithSeeds ( const char * s , s td: : s ize_t len , u int 64 seed0 , u int 64 seed1 ) {
u 64 CityHash64WithSeeds ( const char * s , s ize_t len , u 64 seed0 , u 64 seed1 ) {
return HashLen16 ( CityHash64 ( s , len ) - seed0 , seed1 ) ;
return HashLen16 ( CityHash64 ( s , len ) - seed0 , seed1 ) ;
}
}
// A subroutine for CityHash128(). Returns a decent 128-bit hash for strings
// A subroutine for CityHash128(). Returns a decent 128-bit hash for strings
// of any length representable in signed long. Based on City and Murmur.
// of any length representable in signed long. Based on City and Murmur.
static u int 128 CityMurmur ( const char * s , s td: : s ize_t len , u int 128 seed ) {
static u 128 CityMurmur ( const char * s , s ize_t len , u 128 seed ) {
u int64 a = Uint128Low64 ( seed ) ;
u 64 a = seed [ 0 ] ;
u int64 b = Uint128High64 ( seed ) ;
u 64 b = seed [ 1 ] ;
u int 64 c = 0 ;
u 64 c = 0 ;
u int 64 d = 0 ;
u 64 d = 0 ;
signed long l = static_cast < long > ( len ) - 16 ;
signed long l = static_cast < long > ( len ) - 16 ;
if ( l < = 0 ) { // len <= 16
if ( l < = 0 ) { // len <= 16
a = ShiftMix ( a * k1 ) * k1 ;
a = ShiftMix ( a * k1 ) * k1 ;
@ -266,20 +263,20 @@ static uint128 CityMurmur(const char* s, std::size_t len, uint128 seed) {
}
}
a = HashLen16 ( a , c ) ;
a = HashLen16 ( a , c ) ;
b = HashLen16 ( d , b ) ;
b = HashLen16 ( d , b ) ;
return uint128 ( a ^ b , HashLen16 ( b , a ) ) ;
return u128 { a ^ b , HashLen16 ( b , a ) } ;
}
}
u int 128 CityHash128WithSeed ( const char * s , s td: : s ize_t len , u int 128 seed ) {
u 128 CityHash128WithSeed ( const char * s , s ize_t len , u 128 seed ) {
if ( len < 128 ) {
if ( len < 128 ) {
return CityMurmur ( s , len , seed ) ;
return CityMurmur ( s , len , seed ) ;
}
}
// We expect len >= 128 to be the common case. Keep 56 bytes of state:
// We expect len >= 128 to be the common case. Keep 56 bytes of state:
// v, w, x, y, and z.
// v, w, x, y, and z.
pair < u int 64, u int 64> v , w ;
pair < u 64, u 64> v , w ;
u int64 x = Uint128Low64 ( seed ) ;
u 64 x = seed [ 0 ] ;
u int64 y = Uint128High64 ( seed ) ;
u 64 y = seed [ 1 ] ;
u int 64 z = len * k1 ;
u 64 z = len * k1 ;
v . first = Rotate ( y ^ k1 , 49 ) * k1 + Fetch64 ( s ) ;
v . first = Rotate ( y ^ k1 , 49 ) * k1 + Fetch64 ( s ) ;
v . second = Rotate ( v . first , 42 ) * k1 + Fetch64 ( s + 8 ) ;
v . second = Rotate ( v . first , 42 ) * k1 + Fetch64 ( s + 8 ) ;
w . first = Rotate ( y + z , 35 ) * k1 + x ;
w . first = Rotate ( y + z , 35 ) * k1 + x ;
@ -313,7 +310,7 @@ uint128 CityHash128WithSeed(const char* s, std::size_t len, uint128 seed) {
w . first * = 9 ;
w . first * = 9 ;
v . first * = k0 ;
v . first * = k0 ;
// If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
// If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
for ( s td: : s ize_t tail_done = 0 ; tail_done < len ; ) {
for ( s ize_t tail_done = 0 ; tail_done < len ; ) {
tail_done + = 32 ;
tail_done + = 32 ;
y = Rotate ( x + y , 42 ) * k0 + v . second ;
y = Rotate ( x + y , 42 ) * k0 + v . second ;
w . first + = Fetch64 ( s + len - tail_done + 16 ) ;
w . first + = Fetch64 ( s + len - tail_done + 16 ) ;
@ -328,13 +325,12 @@ uint128 CityHash128WithSeed(const char* s, std::size_t len, uint128 seed) {
// different 56-byte-to-8-byte hashes to get a 16-byte final result.
// different 56-byte-to-8-byte hashes to get a 16-byte final result.
x = HashLen16 ( x , v . first ) ;
x = HashLen16 ( x , v . first ) ;
y = HashLen16 ( y + z , w . first ) ;
y = HashLen16 ( y + z , w . first ) ;
return uint128 ( HashLen16 ( x + v . second , w . second ) + y , HashLen16 ( x + w . second , y + v . second ) ) ;
return u128 { HashLen16 ( x + v . second , w . second ) + y , HashLen16 ( x + w . second , y + v . second ) } ;
}
}
uint128 CityHash128 ( const char * s , std : : size_t len ) {
u128 CityHash128 ( const char * s , size_t len ) {
return len > = 16
return len > = 16 ? CityHash128WithSeed ( s + 16 , len - 16 , u128 { Fetch64 ( s ) , Fetch64 ( s + 8 ) + k0 } )
? CityHash128WithSeed ( s + 16 , len - 16 , uint128 ( Fetch64 ( s ) , Fetch64 ( s + 8 ) + k0 ) )
: CityHash128WithSeed ( s , len , u128 { k0 , k1 } ) ;
: CityHash128WithSeed ( s , len , uint128 ( k0 , k1 ) ) ;
}
}
} // namespace Common
} // namespace Common