bulk.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. // Copyright 2012-present Oliver Eilhard. All rights reserved.
  2. // Use of this source code is governed by a MIT-license.
  3. // See http://olivere.mit-license.org/license.txt for details.
  4. package elastic
  5. import (
  6. "bytes"
  7. "context"
  8. "errors"
  9. "fmt"
  10. "net/url"
  11. "gopkg.in/olivere/elastic.v5/uritemplates"
  12. )
  13. // BulkService allows for batching bulk requests and sending them to
  14. // Elasticsearch in one roundtrip. Use the Add method with BulkIndexRequest,
  15. // BulkUpdateRequest, and BulkDeleteRequest to add bulk requests to a batch,
  16. // then use Do to send them to Elasticsearch.
  17. //
  18. // BulkService will be reset after each Do call. In other words, you can
  19. // reuse BulkService to send many batches. You do not have to create a new
  20. // BulkService for each batch.
  21. //
  22. // See https://www.elastic.co/guide/en/elasticsearch/reference/5.2/docs-bulk.html
  23. // for more details.
  24. type BulkService struct {
  25. client *Client
  26. retrier Retrier
  27. index string
  28. typ string
  29. requests []BulkableRequest
  30. pipeline string
  31. timeout string
  32. refresh string
  33. routing string
  34. waitForActiveShards string
  35. pretty bool
  36. // estimated bulk size in bytes, up to the request index sizeInBytesCursor
  37. sizeInBytes int64
  38. sizeInBytesCursor int
  39. }
  40. // NewBulkService initializes a new BulkService.
  41. func NewBulkService(client *Client) *BulkService {
  42. builder := &BulkService{
  43. client: client,
  44. }
  45. return builder
  46. }
  47. // Reset cleans up the request queue
  48. func (s *BulkService) Reset() {
  49. s.requests = make([]BulkableRequest, 0)
  50. s.sizeInBytes = 0
  51. s.sizeInBytesCursor = 0
  52. }
  53. // Retrier allows to set specific retry logic for this BulkService.
  54. // If not specified, it will use the client's default retrier.
  55. func (s *BulkService) Retrier(retrier Retrier) *BulkService {
  56. s.retrier = retrier
  57. return s
  58. }
  59. // Index specifies the index to use for all batches. You may also leave
  60. // this blank and specify the index in the individual bulk requests.
  61. func (s *BulkService) Index(index string) *BulkService {
  62. s.index = index
  63. return s
  64. }
  65. // Type specifies the type to use for all batches. You may also leave
  66. // this blank and specify the type in the individual bulk requests.
  67. func (s *BulkService) Type(typ string) *BulkService {
  68. s.typ = typ
  69. return s
  70. }
  71. // Timeout is a global timeout for processing bulk requests. This is a
  72. // server-side timeout, i.e. it tells Elasticsearch the time after which
  73. // it should stop processing.
  74. func (s *BulkService) Timeout(timeout string) *BulkService {
  75. s.timeout = timeout
  76. return s
  77. }
  78. // Refresh controls when changes made by this request are made visible
  79. // to search. The allowed values are: "true" (refresh the relevant
  80. // primary and replica shards immediately), "wait_for" (wait for the
  81. // changes to be made visible by a refresh before applying), or "false"
  82. // (no refresh related actions).
  83. func (s *BulkService) Refresh(refresh string) *BulkService {
  84. s.refresh = refresh
  85. return s
  86. }
  87. // Routing specifies the routing value.
  88. func (s *BulkService) Routing(routing string) *BulkService {
  89. s.routing = routing
  90. return s
  91. }
  92. // Pipeline specifies the pipeline id to preprocess incoming documents with.
  93. func (s *BulkService) Pipeline(pipeline string) *BulkService {
  94. s.pipeline = pipeline
  95. return s
  96. }
  97. // WaitForActiveShards sets the number of shard copies that must be active
  98. // before proceeding with the bulk operation. Defaults to 1, meaning the
  99. // primary shard only. Set to `all` for all shard copies, otherwise set to
  100. // any non-negative value less than or equal to the total number of copies
  101. // for the shard (number of replicas + 1).
  102. func (s *BulkService) WaitForActiveShards(waitForActiveShards string) *BulkService {
  103. s.waitForActiveShards = waitForActiveShards
  104. return s
  105. }
  106. // Pretty tells Elasticsearch whether to return a formatted JSON response.
  107. func (s *BulkService) Pretty(pretty bool) *BulkService {
  108. s.pretty = pretty
  109. return s
  110. }
  111. // Add adds bulkable requests, i.e. BulkIndexRequest, BulkUpdateRequest,
  112. // and/or BulkDeleteRequest.
  113. func (s *BulkService) Add(requests ...BulkableRequest) *BulkService {
  114. for _, r := range requests {
  115. s.requests = append(s.requests, r)
  116. }
  117. return s
  118. }
  119. // EstimatedSizeInBytes returns the estimated size of all bulkable
  120. // requests added via Add.
  121. func (s *BulkService) EstimatedSizeInBytes() int64 {
  122. if s.sizeInBytesCursor == len(s.requests) {
  123. return s.sizeInBytes
  124. }
  125. for _, r := range s.requests[s.sizeInBytesCursor:] {
  126. s.sizeInBytes += s.estimateSizeInBytes(r)
  127. s.sizeInBytesCursor++
  128. }
  129. return s.sizeInBytes
  130. }
  131. // estimateSizeInBytes returns the estimates size of the given
  132. // bulkable request, i.e. BulkIndexRequest, BulkUpdateRequest, and
  133. // BulkDeleteRequest.
  134. func (s *BulkService) estimateSizeInBytes(r BulkableRequest) int64 {
  135. lines, _ := r.Source()
  136. size := 0
  137. for _, line := range lines {
  138. // +1 for the \n
  139. size += len(line) + 1
  140. }
  141. return int64(size)
  142. }
  143. // NumberOfActions returns the number of bulkable requests that need to
  144. // be sent to Elasticsearch on the next batch.
  145. func (s *BulkService) NumberOfActions() int {
  146. return len(s.requests)
  147. }
  148. func (s *BulkService) bodyAsString() (string, error) {
  149. // Pre-allocate to reduce allocs
  150. buf := bytes.NewBuffer(make([]byte, 0, s.EstimatedSizeInBytes()))
  151. for _, req := range s.requests {
  152. source, err := req.Source()
  153. if err != nil {
  154. return "", err
  155. }
  156. for _, line := range source {
  157. buf.WriteString(line)
  158. buf.WriteByte('\n')
  159. }
  160. }
  161. return buf.String(), nil
  162. }
  163. // Do sends the batched requests to Elasticsearch. Note that, when successful,
  164. // you can reuse the BulkService for the next batch as the list of bulk
  165. // requests is cleared on success.
  166. func (s *BulkService) Do(ctx context.Context) (*BulkResponse, error) {
  167. // No actions?
  168. if s.NumberOfActions() == 0 {
  169. return nil, errors.New("elastic: No bulk actions to commit")
  170. }
  171. // Get body
  172. body, err := s.bodyAsString()
  173. if err != nil {
  174. return nil, err
  175. }
  176. // Build url
  177. path := "/"
  178. if len(s.index) > 0 {
  179. index, err := uritemplates.Expand("{index}", map[string]string{
  180. "index": s.index,
  181. })
  182. if err != nil {
  183. return nil, err
  184. }
  185. path += index + "/"
  186. }
  187. if len(s.typ) > 0 {
  188. typ, err := uritemplates.Expand("{type}", map[string]string{
  189. "type": s.typ,
  190. })
  191. if err != nil {
  192. return nil, err
  193. }
  194. path += typ + "/"
  195. }
  196. path += "_bulk"
  197. // Parameters
  198. params := make(url.Values)
  199. if s.pretty {
  200. params.Set("pretty", fmt.Sprintf("%v", s.pretty))
  201. }
  202. if s.pipeline != "" {
  203. params.Set("pipeline", s.pipeline)
  204. }
  205. if s.refresh != "" {
  206. params.Set("refresh", s.refresh)
  207. }
  208. if s.routing != "" {
  209. params.Set("routing", s.routing)
  210. }
  211. if s.timeout != "" {
  212. params.Set("timeout", s.timeout)
  213. }
  214. if s.waitForActiveShards != "" {
  215. params.Set("wait_for_active_shards", s.waitForActiveShards)
  216. }
  217. // Get response
  218. res, err := s.client.PerformRequestWithOptions(ctx, PerformRequestOptions{
  219. Method: "POST",
  220. Path: path,
  221. Params: params,
  222. Body: body,
  223. ContentType: "application/x-ndjson",
  224. Retrier: s.retrier,
  225. })
  226. if err != nil {
  227. return nil, err
  228. }
  229. // Return results
  230. ret := new(BulkResponse)
  231. if err := s.client.decoder.Decode(res.Body, ret); err != nil {
  232. return nil, err
  233. }
  234. // Reset so the request can be reused
  235. s.Reset()
  236. return ret, nil
  237. }
  238. // BulkResponse is a response to a bulk execution.
  239. //
  240. // Example:
  241. // {
  242. // "took":3,
  243. // "errors":false,
  244. // "items":[{
  245. // "index":{
  246. // "_index":"index1",
  247. // "_type":"tweet",
  248. // "_id":"1",
  249. // "_version":3,
  250. // "status":201
  251. // }
  252. // },{
  253. // "index":{
  254. // "_index":"index2",
  255. // "_type":"tweet",
  256. // "_id":"2",
  257. // "_version":3,
  258. // "status":200
  259. // }
  260. // },{
  261. // "delete":{
  262. // "_index":"index1",
  263. // "_type":"tweet",
  264. // "_id":"1",
  265. // "_version":4,
  266. // "status":200,
  267. // "found":true
  268. // }
  269. // },{
  270. // "update":{
  271. // "_index":"index2",
  272. // "_type":"tweet",
  273. // "_id":"2",
  274. // "_version":4,
  275. // "status":200
  276. // }
  277. // }]
  278. // }
  279. type BulkResponse struct {
  280. Took int `json:"took,omitempty"`
  281. Errors bool `json:"errors,omitempty"`
  282. Items []map[string]*BulkResponseItem `json:"items,omitempty"`
  283. }
  284. // BulkResponseItem is the result of a single bulk request.
  285. type BulkResponseItem struct {
  286. Index string `json:"_index,omitempty"`
  287. Type string `json:"_type,omitempty"`
  288. Id string `json:"_id,omitempty"`
  289. Version int64 `json:"_version,omitempty"`
  290. Status int `json:"status,omitempty"`
  291. Result string `json:"result,omitempty"`
  292. ForcedRefresh bool `json:"forced_refresh,omitempty"`
  293. Found bool `json:"found,omitempty"`
  294. Error *ErrorDetails `json:"error,omitempty"`
  295. GetResult *GetResult `json:"get,omitempty"`
  296. }
  297. // Indexed returns all bulk request results of "index" actions.
  298. func (r *BulkResponse) Indexed() []*BulkResponseItem {
  299. return r.ByAction("index")
  300. }
  301. // Created returns all bulk request results of "create" actions.
  302. func (r *BulkResponse) Created() []*BulkResponseItem {
  303. return r.ByAction("create")
  304. }
  305. // Updated returns all bulk request results of "update" actions.
  306. func (r *BulkResponse) Updated() []*BulkResponseItem {
  307. return r.ByAction("update")
  308. }
  309. // Deleted returns all bulk request results of "delete" actions.
  310. func (r *BulkResponse) Deleted() []*BulkResponseItem {
  311. return r.ByAction("delete")
  312. }
  313. // ByAction returns all bulk request results of a certain action,
  314. // e.g. "index" or "delete".
  315. func (r *BulkResponse) ByAction(action string) []*BulkResponseItem {
  316. if r.Items == nil {
  317. return nil
  318. }
  319. var items []*BulkResponseItem
  320. for _, item := range r.Items {
  321. if result, found := item[action]; found {
  322. items = append(items, result)
  323. }
  324. }
  325. return items
  326. }
  327. // ById returns all bulk request results of a given document id,
  328. // regardless of the action ("index", "delete" etc.).
  329. func (r *BulkResponse) ById(id string) []*BulkResponseItem {
  330. if r.Items == nil {
  331. return nil
  332. }
  333. var items []*BulkResponseItem
  334. for _, item := range r.Items {
  335. for _, result := range item {
  336. if result.Id == id {
  337. items = append(items, result)
  338. }
  339. }
  340. }
  341. return items
  342. }
  343. // Failed returns those items of a bulk response that have errors,
  344. // i.e. those that don't have a status code between 200 and 299.
  345. func (r *BulkResponse) Failed() []*BulkResponseItem {
  346. if r.Items == nil {
  347. return nil
  348. }
  349. var errors []*BulkResponseItem
  350. for _, item := range r.Items {
  351. for _, result := range item {
  352. if !(result.Status >= 200 && result.Status <= 299) {
  353. errors = append(errors, result)
  354. }
  355. }
  356. }
  357. return errors
  358. }
  359. // Succeeded returns those items of a bulk response that have no errors,
  360. // i.e. those have a status code between 200 and 299.
  361. func (r *BulkResponse) Succeeded() []*BulkResponseItem {
  362. if r.Items == nil {
  363. return nil
  364. }
  365. var succeeded []*BulkResponseItem
  366. for _, item := range r.Items {
  367. for _, result := range item {
  368. if result.Status >= 200 && result.Status <= 299 {
  369. succeeded = append(succeeded, result)
  370. }
  371. }
  372. }
  373. return succeeded
  374. }