cityhash.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595
  1. /*
  2. * Go implementation of Google city hash (MIT license)
  3. * https://code.google.com/p/cityhash/
  4. *
  5. * MIT License http://www.opensource.org/licenses/mit-license.php
  6. *
  7. * I don't even want to pretend to understand the details of city hash.
  8. * I am only reproducing the logic in Go as faithfully as I can.
  9. *
  10. */
  11. package cityhash
  12. import (
  13. "encoding/binary"
  14. "unsafe"
  15. )
  16. /*
  17. var (
  18. little bool
  19. )
  20. func init() {
  21. if IsLittleEndian() {
  22. little = true
  23. } else {
  24. little = false
  25. }
  26. }
  27. */
  28. func IsLittleEndian() bool {
  29. var i int32 = 0x01020304
  30. u := unsafe.Pointer(&i)
  31. pb := (*byte)(u)
  32. b := *pb
  33. return (b == 0x04)
  34. }
  35. func unalignedLoad64(p []byte) (result uint64) {
  36. return binary.LittleEndian.Uint64(p)
  37. /*
  38. if little {
  39. result = binary.LittleEndian.Uint64(p)
  40. } else {
  41. result = binary.BigEndian.Uint64(p)
  42. }
  43. return result
  44. */
  45. }
  46. func unalignedLoad32(p []byte) (result uint32) {
  47. return binary.LittleEndian.Uint32(p)
  48. /*
  49. if little {
  50. result = binary.LittleEndian.Uint32(p)
  51. } else {
  52. result = binary.BigEndian.Uint32(p)
  53. }
  54. return result
  55. */
  56. }
  57. func bswap64(x uint64) uint64 {
  58. // Copied from netbsd's bswap64.c
  59. return ((x << 56) & 0xff00000000000000) |
  60. ((x << 40) & 0x00ff000000000000) |
  61. ((x << 24) & 0x0000ff0000000000) |
  62. ((x << 8) & 0x000000ff00000000) |
  63. ((x >> 8) & 0x00000000ff000000) |
  64. ((x >> 24) & 0x0000000000ff0000) |
  65. ((x >> 40) & 0x000000000000ff00) |
  66. ((x >> 56) & 0x00000000000000ff)
  67. }
  68. func bswap32(x uint32) uint32 {
  69. // Copied from netbsd's bswap32.c
  70. return ((x << 24) & 0xff000000) |
  71. ((x << 8) & 0x00ff0000) |
  72. ((x >> 8) & 0x0000ff00) |
  73. ((x >> 24) & 0x000000ff)
  74. }
  75. func uint32InExpectedOrder(x uint32) uint32 {
  76. /*
  77. if !little {
  78. return bswap32(x)
  79. }
  80. */
  81. return x
  82. }
  83. func uint64InExpectedOrder(x uint64) uint64 {
  84. /*
  85. if !little {
  86. return bswap64(x)
  87. }
  88. */
  89. return x
  90. }
  91. // If I understand the original code correctly, it's expecting to load either 8 or 4
  92. // byes in little endian order. so let's just simplify it a bit since we will do that
  93. // anyway..
  94. // https://code.google.com/p/cityhash/source/browse/trunk/src/city.cc#112
  95. func fetch64(p []byte) uint64 {
  96. return binary.LittleEndian.Uint64(p)
  97. //return uint64InExpectedOrder(unalignedLoad64(p))
  98. }
  99. func fetch32(p []byte) uint32 {
  100. return binary.LittleEndian.Uint32(p)
  101. //return uint32InExpectedOrder(unalignedLoad32(p))
  102. }
  103. const (
  104. k0 uint64 = 0xc3a5c85c97cb3127
  105. k1 uint64 = 0xb492b66fbe98f273
  106. k2 uint64 = 0x9ae16a3b2f90404f
  107. c1 uint32 = 0xcc9e2d51
  108. c2 uint32 = 0x1b873593
  109. )
  110. func fmix(h uint32) uint32 {
  111. h ^= h >> 16
  112. h *= 0x85ebca6b
  113. h ^= h >> 13
  114. h *= 0xc2b2ae35
  115. h ^= h >> 16
  116. return h
  117. }
  118. func rotate64(val uint64, shift uint32) uint64 {
  119. // Avoid shifting by 64: doing so yields an undefined result.
  120. if shift != 0 {
  121. return ((val >> shift) | (val << (64 - shift)))
  122. }
  123. return val
  124. }
  125. func rotate32(val uint32, shift uint32) uint32 {
  126. // Avoid shifting by 32: doing so yields an undefined result.
  127. if shift != 0 {
  128. return ((val >> shift) | (val << (32 - shift)))
  129. }
  130. return val
  131. }
  132. func swap64(a, b *uint64) {
  133. *a, *b = *b, *a
  134. }
  135. func swap32(a, b *uint32) {
  136. *a, *b = *b, *a
  137. }
  138. func permute3(a, b, c *uint32) {
  139. swap32(a, b)
  140. swap32(a, c)
  141. }
  142. func mur(a, h uint32) uint32 {
  143. a *= c1
  144. a = rotate32(a, 17)
  145. a *= c2
  146. h ^= a
  147. h = rotate32(h, 19)
  148. //return h * 5 + 0xe6546b64
  149. z := h*5 + 0xe6546b64
  150. return z
  151. }
  152. func hash32Len13to24(s []byte, length uint32) uint32 {
  153. var a uint32 = fetch32(s[(length>>1)-4:])
  154. var b uint32 = fetch32(s[4:])
  155. var c uint32 = fetch32(s[length-8:])
  156. var d uint32 = fetch32(s[(length >> 1):])
  157. var e uint32 = fetch32(s)
  158. var f uint32 = fetch32(s[length-4:])
  159. var h uint32 = length
  160. return fmix(mur(f, mur(e, mur(d, mur(c, mur(b, mur(a, h)))))))
  161. }
  162. func hash32Len0to4(s []byte, length uint32) uint32 {
  163. var b, c uint32 = 0, 9
  164. tmp := s[:length]
  165. for _, v := range tmp {
  166. b = uint32(int64(b)*int64(c1) + int64(int8(v)))
  167. c ^= b
  168. }
  169. return fmix(mur(b, mur(length, c)))
  170. }
  171. func hash32Len5to12(s []byte, length uint32) uint32 {
  172. var a, b, c uint32 = length, length * 5, 9
  173. var d uint32 = b
  174. a += fetch32(s)
  175. b += fetch32(s[length-4:])
  176. c += fetch32(s[((length >> 1) & 4):])
  177. return fmix(mur(c, mur(b, mur(a, d))))
  178. }
  179. func CityHash32(s []byte, length uint32) uint32 {
  180. if length <= 4 {
  181. return hash32Len0to4(s, length)
  182. } else if length <= 12 {
  183. return hash32Len5to12(s, length)
  184. } else if length <= 24 {
  185. return hash32Len13to24(s, length)
  186. }
  187. // length > 24
  188. var h uint32 = length
  189. var g uint32 = c1 * length
  190. var f uint32 = g
  191. var a0 uint32 = rotate32(fetch32(s[length-4:])*c1, 17) * c2
  192. var a1 uint32 = rotate32(fetch32(s[length-8:])*c1, 17) * c2
  193. var a2 uint32 = rotate32(fetch32(s[length-16:])*c1, 17) * c2
  194. var a3 uint32 = rotate32(fetch32(s[length-12:])*c1, 17) * c2
  195. var a4 uint32 = rotate32(fetch32(s[length-20:])*c1, 17) * c2
  196. h ^= a0
  197. h = rotate32(h, 19)
  198. h = h*5 + 0xe6546b64
  199. h ^= a2
  200. h = rotate32(h, 19)
  201. h = h*5 + 0xe6546b64
  202. g ^= a1
  203. g = rotate32(g, 19)
  204. g = g*5 + 0xe6546b64
  205. g ^= a3
  206. g = rotate32(g, 19)
  207. g = g*5 + 0xe6546b64
  208. f += a4
  209. f = rotate32(f, 19)
  210. f = f*5 + 0xe6546b64
  211. var iters uint32 = (length - 1) / 20
  212. for {
  213. var a0 uint32 = rotate32(fetch32(s)*c1, 17) * c2
  214. var a1 uint32 = fetch32(s[4:])
  215. var a2 uint32 = rotate32(fetch32(s[8:])*c1, 17) * c2
  216. var a3 uint32 = rotate32(fetch32(s[12:])*c1, 17) * c2
  217. var a4 uint32 = fetch32(s[16:])
  218. h ^= a0
  219. h = rotate32(h, 18)
  220. h = h*5 + 0xe6546b64
  221. f += a1
  222. f = rotate32(f, 19)
  223. f = f * c1
  224. g += a2
  225. g = rotate32(g, 18)
  226. g = g*5 + 0xe6546b64
  227. h ^= a3 + a1
  228. h = rotate32(h, 19)
  229. h = h*5 + 0xe6546b64
  230. g ^= a4
  231. g = bswap32(g) * 5
  232. h += a4 * 5
  233. h = bswap32(h)
  234. f += a0
  235. permute3(&f, &h, &g)
  236. s = s[20:]
  237. iters--
  238. if iters == 0 {
  239. break
  240. }
  241. }
  242. g = rotate32(g, 11) * c1
  243. g = rotate32(g, 17) * c1
  244. f = rotate32(f, 11) * c1
  245. f = rotate32(f, 17) * c1
  246. h = rotate32(h+g, 19)
  247. h = h*5 + 0xe6546b64
  248. h = rotate32(h, 17) * c1
  249. h = rotate32(h+f, 19)
  250. h = h*5 + 0xe6546b64
  251. h = rotate32(h, 17) * c1
  252. return h
  253. }
  254. func shiftMix(val uint64) uint64 {
  255. return val ^ (val >> 47)
  256. }
  257. type Uint128 [2]uint64
  258. func (this *Uint128) setLower64(l uint64) {
  259. this[0] = l
  260. }
  261. func (this *Uint128) setHigher64(h uint64) {
  262. this[1] = h
  263. }
  264. func (this Uint128) Lower64() uint64 {
  265. return this[0]
  266. }
  267. func (this Uint128) Higher64() uint64 {
  268. return this[1]
  269. }
  270. func (this Uint128) Bytes() []byte {
  271. b := make([]byte, 16)
  272. binary.LittleEndian.PutUint64(b, this[0])
  273. binary.LittleEndian.PutUint64(b[8:], this[1])
  274. return b
  275. }
  276. func hash128to64(x Uint128) uint64 {
  277. // Murmur-inspired hashing.
  278. const kMul uint64 = 0x9ddfea08eb382d69
  279. var a uint64 = (x.Lower64() ^ x.Higher64()) * kMul
  280. a ^= (a >> 47)
  281. var b uint64 = (x.Higher64() ^ a) * kMul
  282. b ^= (b >> 47)
  283. b *= kMul
  284. return b
  285. }
  286. func hashLen16(u, v uint64) uint64 {
  287. return hash128to64(Uint128{u, v})
  288. }
  289. func hashLen16_3(u, v, mul uint64) uint64 {
  290. // Murmur-inspired hashing.
  291. var a uint64 = (u ^ v) * mul
  292. a ^= (a >> 47)
  293. var b uint64 = (v ^ a) * mul
  294. b ^= (b >> 47)
  295. b *= mul
  296. return b
  297. }
  298. func hashLen0to16(s []byte, length uint32) uint64 {
  299. if length >= 8 {
  300. var mul uint64 = k2 + uint64(length)*2
  301. var a uint64 = fetch64(s) + k2
  302. var b uint64 = fetch64(s[length-8:])
  303. var c uint64 = rotate64(b, 37)*mul + a
  304. var d uint64 = (rotate64(a, 25) + b) * mul
  305. return hashLen16_3(c, d, mul)
  306. }
  307. if length >= 4 {
  308. var mul uint64 = k2 + uint64(length)*2
  309. var a uint64 = uint64(fetch32(s))
  310. return hashLen16_3(uint64(length)+(a<<3), uint64(fetch32(s[length-4:])), mul)
  311. }
  312. if length > 0 {
  313. var a uint8 = uint8(s[0])
  314. var b uint8 = uint8(s[length>>1])
  315. var c uint8 = uint8(s[length-1])
  316. var y uint32 = uint32(a) + (uint32(b) << 8)
  317. var z uint32 = length + (uint32(c) << 2)
  318. return shiftMix(uint64(y)*k2^uint64(z)*k0) * k2
  319. }
  320. return k2
  321. }
  322. func hashLen17to32(s []byte, length uint32) uint64 {
  323. var mul uint64 = k2 + uint64(length)*2
  324. var a uint64 = fetch64(s) * k1
  325. var b uint64 = fetch64(s[8:])
  326. var c uint64 = fetch64(s[length-8:]) * mul
  327. var d uint64 = fetch64(s[length-16:]) * k2
  328. return hashLen16_3(rotate64(a+b, 43)+rotate64(c, 30)+d, a+rotate64(b+k2, 18)+c, mul)
  329. }
  330. func weakHashLen32WithSeeds(w, x, y, z, a, b uint64) Uint128 {
  331. a += w
  332. b = rotate64(b+a+z, 21)
  333. var c uint64 = a
  334. a += x
  335. a += y
  336. b += rotate64(a, 44)
  337. return Uint128{a + z, b + c}
  338. }
  339. func weakHashLen32WithSeeds_3(s []byte, a, b uint64) Uint128 {
  340. return weakHashLen32WithSeeds(fetch64(s), fetch64(s[8:]), fetch64(s[16:]), fetch64(s[24:]), a, b)
  341. }
  342. func hashLen33to64(s []byte, length uint32) uint64 {
  343. var mul uint64 = k2 + uint64(length)*2
  344. var a uint64 = fetch64(s) * k2
  345. var b uint64 = fetch64(s[8:])
  346. var c uint64 = fetch64(s[length-24:])
  347. var d uint64 = fetch64(s[length-32:])
  348. var e uint64 = fetch64(s[16:]) * k2
  349. var f uint64 = fetch64(s[24:]) * 9
  350. var g uint64 = fetch64(s[length-8:])
  351. var h uint64 = fetch64(s[length-16:]) * mul
  352. var u uint64 = rotate64(a+g, 43) + (rotate64(b, 30)+c)*9
  353. var v uint64 = ((a + g) ^ d) + f + 1
  354. var w uint64 = bswap64((u+v)*mul) + h
  355. var x uint64 = rotate64(e+f, 42) + c
  356. var y uint64 = (bswap64((v+w)*mul) + g) * mul
  357. var z uint64 = e + f + c
  358. a = bswap64((x+z)*mul+y) + b
  359. b = shiftMix((z+a)*mul+d+h) * mul
  360. return b + x
  361. }
  362. func CityHash64(s []byte, length uint32) uint64 {
  363. if length <= 32 {
  364. if length <= 16 {
  365. return hashLen0to16(s, length)
  366. } else {
  367. return hashLen17to32(s, length)
  368. }
  369. } else if length <= 64 {
  370. return hashLen33to64(s, length)
  371. }
  372. var x uint64 = fetch64(s[length-40:])
  373. var y uint64 = fetch64(s[length-16:]) + fetch64(s[length-56:])
  374. var z uint64 = hashLen16(fetch64(s[length-48:])+uint64(length), fetch64(s[length-24:]))
  375. var v Uint128 = weakHashLen32WithSeeds_3(s[length-64:], uint64(length), z)
  376. var w Uint128 = weakHashLen32WithSeeds_3(s[length-32:], y+k1, x)
  377. x = x*k1 + fetch64(s)
  378. length = (length - 1) & ^uint32(63)
  379. for {
  380. x = rotate64(x+y+v.Lower64()+fetch64(s[8:]), 37) * k1
  381. y = rotate64(y+v.Higher64()+fetch64(s[48:]), 42) * k1
  382. x ^= w.Higher64()
  383. y += v.Lower64() + fetch64(s[40:])
  384. z = rotate64(z+w.Lower64(), 33) * k1
  385. v = weakHashLen32WithSeeds_3(s, v.Higher64()*k1, x+w.Lower64())
  386. w = weakHashLen32WithSeeds_3(s[32:], z+w.Higher64(), y+fetch64(s[16:]))
  387. swap64(&z, &x)
  388. s = s[64:]
  389. length -= 64
  390. if length == 0 {
  391. break
  392. }
  393. }
  394. return hashLen16(hashLen16(v.Lower64(), w.Lower64())+shiftMix(y)*k1+z, hashLen16(v.Higher64(), w.Higher64())+x)
  395. }
  396. func CityHash64WithSeed(s []byte, length uint32, seed uint64) uint64 {
  397. return CityHash64WithSeeds(s, length, k2, seed)
  398. }
  399. func CityHash64WithSeeds(s []byte, length uint32, seed0, seed1 uint64) uint64 {
  400. return hashLen16(CityHash64(s, length)-seed0, seed1)
  401. }
  402. func cityMurmur(s []byte, length uint32, seed Uint128) Uint128 {
  403. var a uint64 = seed.Lower64()
  404. var b uint64 = seed.Higher64()
  405. var c uint64 = 0
  406. var d uint64 = 0
  407. var l int32 = int32(length) - 16
  408. if l <= 0 { // len <= 16
  409. a = shiftMix(a*k1) * k1
  410. c = b*k1 + hashLen0to16(s, length)
  411. if length >= 8 {
  412. d = shiftMix(a + fetch64(s))
  413. } else {
  414. d = shiftMix(a + c)
  415. }
  416. } else { // len > 16
  417. c = hashLen16(fetch64(s[length-8:])+k1, a)
  418. d = hashLen16(b+uint64(length), c+fetch64(s[length-16:]))
  419. a += d
  420. for {
  421. a ^= shiftMix(fetch64(s)*k1) * k1
  422. a *= k1
  423. b ^= a
  424. c ^= shiftMix(fetch64(s[8:])*k1) * k1
  425. c *= k1
  426. d ^= c
  427. s = s[16:]
  428. l -= 16
  429. if l <= 0 {
  430. break
  431. }
  432. }
  433. }
  434. a = hashLen16(a, c)
  435. b = hashLen16(d, b)
  436. return Uint128{a ^ b, hashLen16(b, a)}
  437. }
  438. func CityHash128WithSeed(s []byte, length uint32, seed Uint128) Uint128 {
  439. if length < 128 {
  440. return cityMurmur(s, length, seed)
  441. }
  442. orig_length := length
  443. var t []byte = s
  444. // We expect length >= 128 to be the common case. Keep 56 bytes of state:
  445. // v, w, x, y, and z.
  446. var v, w Uint128
  447. var x uint64 = seed.Lower64()
  448. var y uint64 = seed.Higher64()
  449. var z uint64 = uint64(length) * k1
  450. v.setLower64(rotate64(y^k1, 49)*k1 + fetch64(s))
  451. v.setHigher64(rotate64(v.Lower64(), 42)*k1 + fetch64(s[8:]))
  452. w.setLower64(rotate64(y+z, 35)*k1 + x)
  453. w.setHigher64(rotate64(x+fetch64(s[88:]), 53) * k1)
  454. // This is the same inner loop as CityHash64(), manually unrolled.
  455. for {
  456. x = rotate64(x+y+v.Lower64()+fetch64(s[8:]), 37) * k1
  457. y = rotate64(y+v.Higher64()+fetch64(s[48:]), 42) * k1
  458. x ^= w.Higher64()
  459. y += v.Lower64() + fetch64(s[40:])
  460. z = rotate64(z+w.Lower64(), 33) * k1
  461. v = weakHashLen32WithSeeds_3(s, v.Higher64()*k1, x+w.Lower64())
  462. w = weakHashLen32WithSeeds_3(s[32:], z+w.Higher64(), y+fetch64(s[16:]))
  463. swap64(&z, &x)
  464. s = s[64:]
  465. x = rotate64(x+y+v.Lower64()+fetch64(s[8:]), 37) * k1
  466. y = rotate64(y+v.Higher64()+fetch64(s[48:]), 42) * k1
  467. x ^= w.Higher64()
  468. y += v.Lower64() + fetch64(s[40:])
  469. z = rotate64(z+w.Lower64(), 33) * k1
  470. v = weakHashLen32WithSeeds_3(s, v.Higher64()*k1, x+w.Lower64())
  471. w = weakHashLen32WithSeeds_3(s[32:], z+w.Higher64(), y+fetch64(s[16:]))
  472. swap64(&z, &x)
  473. s = s[64:]
  474. length -= 128
  475. if length < 128 {
  476. break
  477. }
  478. }
  479. x += rotate64(v.Lower64()+z, 49) * k0
  480. y = y*k0 + rotate64(w.Higher64(), 37)
  481. z = z*k0 + rotate64(w.Lower64(), 27)
  482. w.setLower64(w.Lower64() * 9)
  483. v.setLower64(v.Lower64() * k0)
  484. // If 0 < length < 128, hash up to 4 chunks of 32 bytes each from the end of s.
  485. var tail_done uint32
  486. for tail_done = 0; tail_done < length; {
  487. tail_done += 32
  488. y = rotate64(x+y, 42)*k0 + v.Higher64()
  489. w.setLower64(w.Lower64() + fetch64(t[orig_length-tail_done+16:]))
  490. x = x*k0 + w.Lower64()
  491. z += w.Higher64() + fetch64(t[orig_length-tail_done:])
  492. w.setHigher64(w.Higher64() + v.Lower64())
  493. v = weakHashLen32WithSeeds_3(t[orig_length-tail_done:], v.Lower64()+z, v.Higher64())
  494. v.setLower64(v.Lower64() * k0)
  495. }
  496. // At this point our 56 bytes of state should contain more than
  497. // enough information for a strong 128-bit hash. We use two
  498. // different 56-byte-to-8-byte hashes to get a 16-byte final result.
  499. x = hashLen16(x, v.Lower64())
  500. y = hashLen16(y+z, w.Lower64())
  501. return Uint128{hashLen16(x+v.Higher64(), w.Higher64()) + y,
  502. hashLen16(x+w.Higher64(), y+v.Higher64())}
  503. }
  504. func CityHash128(s []byte, length uint32) (result Uint128) {
  505. if length >= 16 {
  506. result = CityHash128WithSeed(s[16:length], length-16, Uint128{fetch64(s), fetch64(s[8:length]) + k0})
  507. } else {
  508. result = CityHash128WithSeed(s, length, Uint128{k0, k1})
  509. }
  510. return
  511. }