riot.go 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. // Copyright 2017 ego authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License"): you may
  4. // not use this file except in compliance with the License. You may obtain
  5. // a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  11. // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  12. // License for the specific language governing permissions and limitations
  13. // under the License.
  14. package riot
  15. import (
  16. "bytes"
  17. "fmt"
  18. "log"
  19. "os"
  20. "strings"
  21. "encoding/binary"
  22. "encoding/gob"
  23. "github.com/go-ego/murmur"
  24. "github.com/go-ego/riot/core"
  25. "github.com/go-ego/riot/types"
  26. toml "github.com/go-vgo/gt/conf"
  27. )
  28. // New create a new engine with mode
  29. func New(conf ...interface{}) *Engine {
  30. // func (engine *Engine) New(conf com.Config) *Engine{
  31. if len(conf) > 0 && strings.HasSuffix(conf[0].(string), ".toml") {
  32. var (
  33. config types.EngineOpts
  34. searcher = &Engine{}
  35. )
  36. fs := conf[0].(string)
  37. log.Println("conf path is: ", fs)
  38. toml.Init(fs, &config)
  39. go toml.Watch(fs, &config)
  40. searcher.Init(config)
  41. return searcher
  42. }
  43. return NewEngine(conf...)
  44. }
  45. // NewEngine create a new engine
  46. func NewEngine(conf ...interface{}) *Engine {
  47. var (
  48. searcher = &Engine{}
  49. path = DefaultPath
  50. storageShards = 10
  51. numShards = 10
  52. segmentDict string
  53. )
  54. if len(conf) > 0 {
  55. segmentDict = conf[0].(string)
  56. }
  57. if len(conf) > 1 {
  58. path = conf[1].(string)
  59. }
  60. if len(conf) > 2 {
  61. numShards = conf[2].(int)
  62. storageShards = conf[2].(int)
  63. }
  64. searcher.Init(types.EngineOpts{
  65. // Using: using,
  66. StoreShards: storageShards,
  67. NumShards: numShards,
  68. IndexerOpts: &types.IndexerOpts{
  69. IndexType: types.DocIdsIndex,
  70. },
  71. UseStore: true,
  72. StoreFolder: path,
  73. // StoreEngine: storageEngine,
  74. GseDict: segmentDict,
  75. // StopTokenFile: stopTokenFile,
  76. })
  77. // defer searcher.Close()
  78. os.MkdirAll(path, 0777)
  79. // 等待索引刷新完毕
  80. // searcher.Flush()
  81. // log.Println("recover index number: ", searcher.NumDocsIndexed())
  82. return searcher
  83. }
  84. // func (engine *Engine) IsDocExist(docId uint64) bool {
  85. // return core.IsDocExist(docId)
  86. // }
  87. // HasDoc if the document is exist return true
  88. func (engine *Engine) HasDoc(docId uint64) bool {
  89. for shard := 0; shard < engine.initOptions.NumShards; shard++ {
  90. engine.indexers = append(engine.indexers, core.Indexer{})
  91. has := engine.indexers[shard].HasDoc(docId)
  92. if has {
  93. return true
  94. }
  95. }
  96. return false
  97. }
  98. // HasDocDB if the document is exist in the database
  99. // return true
  100. func (engine *Engine) HasDocDB(docId uint64) bool {
  101. b := make([]byte, 10)
  102. length := binary.PutUvarint(b, docId)
  103. shard := murmur.Sum32(fmt.Sprintf("%d", docId)) %
  104. uint32(engine.initOptions.StoreShards)
  105. has, err := engine.dbs[shard].Has(b[0:length])
  106. if err != nil {
  107. log.Println("engine.dbs[shard].Has(b[0:length]): ", err)
  108. }
  109. return has
  110. }
  111. // GetDBAllIds get all the DocId from the storage database
  112. // and return
  113. // 从数据库遍历所有的 DocId, 并返回
  114. func (engine *Engine) GetDBAllIds() []uint64 {
  115. docsId := make([]uint64, 0)
  116. for i := range engine.dbs {
  117. engine.dbs[i].ForEach(func(k, v []byte) error {
  118. // fmt.Println(k, v)
  119. docId, _ := binary.Uvarint(k)
  120. docsId = append(docsId, docId)
  121. return nil
  122. })
  123. }
  124. return docsId
  125. }
  126. // GetDBAllDocs get the db all docs
  127. func (engine *Engine) GetDBAllDocs() (
  128. docsId []uint64, docsData []types.DocData) {
  129. for i := range engine.dbs {
  130. engine.dbs[i].ForEach(func(key, val []byte) error {
  131. // fmt.Println(k, v)
  132. docId, _ := binary.Uvarint(key)
  133. docsId = append(docsId, docId)
  134. buf := bytes.NewReader(val)
  135. dec := gob.NewDecoder(buf)
  136. var data types.DocData
  137. err := dec.Decode(&data)
  138. if err != nil {
  139. log.Println("dec.decode: ", err)
  140. }
  141. docsData = append(docsData, data)
  142. return nil
  143. })
  144. }
  145. return docsId, docsData
  146. }
  147. // GetAllDocIds get all the DocId from the storage database
  148. // and return
  149. // 从数据库遍历所有的 DocId, 并返回
  150. func (engine *Engine) GetAllDocIds() []uint64 {
  151. return engine.GetDBAllIds()
  152. }
  153. // Try handler(err)
  154. func Try(fun func(), handler func(interface{})) {
  155. defer func() {
  156. if err := recover(); err != nil {
  157. handler(err)
  158. }
  159. }()
  160. fun()
  161. }