scroll.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470
  1. // Copyright 2012-present Oliver Eilhard. All rights reserved.
  2. // Use of this source code is governed by a MIT-license.
  3. // See http://olivere.mit-license.org/license.txt for details.
  4. package elastic
  5. import (
  6. "context"
  7. "fmt"
  8. "io"
  9. "net/url"
  10. "strings"
  11. "sync"
  12. "gopkg.in/olivere/elastic.v5/uritemplates"
  13. )
  14. const (
  15. // DefaultScrollKeepAlive is the default time a scroll cursor will be kept alive.
  16. DefaultScrollKeepAlive = "5m"
  17. )
  18. // ScrollService iterates over pages of search results from Elasticsearch.
  19. type ScrollService struct {
  20. client *Client
  21. retrier Retrier
  22. indices []string
  23. types []string
  24. keepAlive string
  25. body interface{}
  26. ss *SearchSource
  27. size *int
  28. pretty bool
  29. routing string
  30. preference string
  31. ignoreUnavailable *bool
  32. allowNoIndices *bool
  33. expandWildcards string
  34. mu sync.RWMutex
  35. scrollId string
  36. }
  37. // NewScrollService initializes and returns a new ScrollService.
  38. func NewScrollService(client *Client) *ScrollService {
  39. builder := &ScrollService{
  40. client: client,
  41. ss: NewSearchSource(),
  42. keepAlive: DefaultScrollKeepAlive,
  43. }
  44. return builder
  45. }
  46. // Retrier allows to set specific retry logic for this ScrollService.
  47. // If not specified, it will use the client's default retrier.
  48. func (s *ScrollService) Retrier(retrier Retrier) *ScrollService {
  49. s.retrier = retrier
  50. return s
  51. }
  52. // Index sets the name of one or more indices to iterate over.
  53. func (s *ScrollService) Index(indices ...string) *ScrollService {
  54. if s.indices == nil {
  55. s.indices = make([]string, 0)
  56. }
  57. s.indices = append(s.indices, indices...)
  58. return s
  59. }
  60. // Type sets the name of one or more types to iterate over.
  61. func (s *ScrollService) Type(types ...string) *ScrollService {
  62. if s.types == nil {
  63. s.types = make([]string, 0)
  64. }
  65. s.types = append(s.types, types...)
  66. return s
  67. }
  68. // Scroll is an alias for KeepAlive, the time to keep
  69. // the cursor alive (e.g. "5m" for 5 minutes).
  70. func (s *ScrollService) Scroll(keepAlive string) *ScrollService {
  71. s.keepAlive = keepAlive
  72. return s
  73. }
  74. // KeepAlive sets the maximum time after which the cursor will expire.
  75. // It is "2m" by default.
  76. func (s *ScrollService) KeepAlive(keepAlive string) *ScrollService {
  77. s.keepAlive = keepAlive
  78. return s
  79. }
  80. // Size specifies the number of documents Elasticsearch should return
  81. // from each shard, per page.
  82. func (s *ScrollService) Size(size int) *ScrollService {
  83. s.size = &size
  84. return s
  85. }
  86. // Body sets the raw body to send to Elasticsearch. This can be e.g. a string,
  87. // a map[string]interface{} or anything that can be serialized into JSON.
  88. // Notice that setting the body disables the use of SearchSource and many
  89. // other properties of the SearchService.
  90. func (s *ScrollService) Body(body interface{}) *ScrollService {
  91. s.body = body
  92. return s
  93. }
  94. // SearchSource sets the search source builder to use with this iterator.
  95. // Notice that only a certain number of properties can be used when scrolling,
  96. // e.g. query and sorting.
  97. func (s *ScrollService) SearchSource(searchSource *SearchSource) *ScrollService {
  98. s.ss = searchSource
  99. if s.ss == nil {
  100. s.ss = NewSearchSource()
  101. }
  102. return s
  103. }
  104. // Query sets the query to perform, e.g. a MatchAllQuery.
  105. func (s *ScrollService) Query(query Query) *ScrollService {
  106. s.ss = s.ss.Query(query)
  107. return s
  108. }
  109. // PostFilter is executed as the last filter. It only affects the
  110. // search hits but not facets. See
  111. // https://www.elastic.co/guide/en/elasticsearch/reference/5.2/search-request-post-filter.html
  112. // for details.
  113. func (s *ScrollService) PostFilter(postFilter Query) *ScrollService {
  114. s.ss = s.ss.PostFilter(postFilter)
  115. return s
  116. }
  117. // Slice allows slicing the scroll request into several batches.
  118. // This is supported in Elasticsearch 5.0 or later.
  119. // See https://www.elastic.co/guide/en/elasticsearch/reference/5.2/search-request-scroll.html#sliced-scroll
  120. // for details.
  121. func (s *ScrollService) Slice(sliceQuery Query) *ScrollService {
  122. s.ss = s.ss.Slice(sliceQuery)
  123. return s
  124. }
  125. // FetchSource indicates whether the response should contain the stored
  126. // _source for every hit.
  127. func (s *ScrollService) FetchSource(fetchSource bool) *ScrollService {
  128. s.ss = s.ss.FetchSource(fetchSource)
  129. return s
  130. }
  131. // FetchSourceContext indicates how the _source should be fetched.
  132. func (s *ScrollService) FetchSourceContext(fetchSourceContext *FetchSourceContext) *ScrollService {
  133. s.ss = s.ss.FetchSourceContext(fetchSourceContext)
  134. return s
  135. }
  136. // Version can be set to true to return a version for each search hit.
  137. // See https://www.elastic.co/guide/en/elasticsearch/reference/5.2/search-request-version.html.
  138. func (s *ScrollService) Version(version bool) *ScrollService {
  139. s.ss = s.ss.Version(version)
  140. return s
  141. }
  142. // Sort adds a sort order. This can have negative effects on the performance
  143. // of the scroll operation as Elasticsearch needs to sort first.
  144. func (s *ScrollService) Sort(field string, ascending bool) *ScrollService {
  145. s.ss = s.ss.Sort(field, ascending)
  146. return s
  147. }
  148. // SortWithInfo specifies a sort order. Notice that sorting can have a
  149. // negative impact on scroll performance.
  150. func (s *ScrollService) SortWithInfo(info SortInfo) *ScrollService {
  151. s.ss = s.ss.SortWithInfo(info)
  152. return s
  153. }
  154. // SortBy specifies a sort order. Notice that sorting can have a
  155. // negative impact on scroll performance.
  156. func (s *ScrollService) SortBy(sorter ...Sorter) *ScrollService {
  157. s.ss = s.ss.SortBy(sorter...)
  158. return s
  159. }
  160. // Pretty asks Elasticsearch to pretty-print the returned JSON.
  161. func (s *ScrollService) Pretty(pretty bool) *ScrollService {
  162. s.pretty = pretty
  163. return s
  164. }
  165. // Routing is a list of specific routing values to control the shards
  166. // the search will be executed on.
  167. func (s *ScrollService) Routing(routings ...string) *ScrollService {
  168. s.routing = strings.Join(routings, ",")
  169. return s
  170. }
  171. // Preference sets the preference to execute the search. Defaults to
  172. // randomize across shards ("random"). Can be set to "_local" to prefer
  173. // local shards, "_primary" to execute on primary shards only,
  174. // or a custom value which guarantees that the same order will be used
  175. // across different requests.
  176. func (s *ScrollService) Preference(preference string) *ScrollService {
  177. s.preference = preference
  178. return s
  179. }
  180. // IgnoreUnavailable indicates whether the specified concrete indices
  181. // should be ignored when unavailable (missing or closed).
  182. func (s *ScrollService) IgnoreUnavailable(ignoreUnavailable bool) *ScrollService {
  183. s.ignoreUnavailable = &ignoreUnavailable
  184. return s
  185. }
  186. // AllowNoIndices indicates whether to ignore if a wildcard indices
  187. // expression resolves into no concrete indices. (This includes `_all` string
  188. // or when no indices have been specified).
  189. func (s *ScrollService) AllowNoIndices(allowNoIndices bool) *ScrollService {
  190. s.allowNoIndices = &allowNoIndices
  191. return s
  192. }
  193. // ExpandWildcards indicates whether to expand wildcard expression to
  194. // concrete indices that are open, closed or both.
  195. func (s *ScrollService) ExpandWildcards(expandWildcards string) *ScrollService {
  196. s.expandWildcards = expandWildcards
  197. return s
  198. }
  199. // ScrollId specifies the identifier of a scroll in action.
  200. func (s *ScrollService) ScrollId(scrollId string) *ScrollService {
  201. s.mu.Lock()
  202. s.scrollId = scrollId
  203. s.mu.Unlock()
  204. return s
  205. }
  206. // Do returns the next search result. It will return io.EOF as error if there
  207. // are no more search results.
  208. func (s *ScrollService) Do(ctx context.Context) (*SearchResult, error) {
  209. s.mu.RLock()
  210. nextScrollId := s.scrollId
  211. s.mu.RUnlock()
  212. if len(nextScrollId) == 0 {
  213. return s.first(ctx)
  214. }
  215. return s.next(ctx)
  216. }
  217. // Clear cancels the current scroll operation. If you don't do this manually,
  218. // the scroll will be expired automatically by Elasticsearch. You can control
  219. // how long a scroll cursor is kept alive with the KeepAlive func.
  220. func (s *ScrollService) Clear(ctx context.Context) error {
  221. s.mu.RLock()
  222. scrollId := s.scrollId
  223. s.mu.RUnlock()
  224. if len(scrollId) == 0 {
  225. return nil
  226. }
  227. path := "/_search/scroll"
  228. params := url.Values{}
  229. body := struct {
  230. ScrollId []string `json:"scroll_id,omitempty"`
  231. }{
  232. ScrollId: []string{scrollId},
  233. }
  234. _, err := s.client.PerformRequestWithOptions(ctx, PerformRequestOptions{
  235. Method: "DELETE",
  236. Path: path,
  237. Params: params,
  238. Body: body,
  239. Retrier: s.retrier,
  240. })
  241. if err != nil {
  242. return err
  243. }
  244. return nil
  245. }
  246. // -- First --
  247. // first takes the first page of search results.
  248. func (s *ScrollService) first(ctx context.Context) (*SearchResult, error) {
  249. // Get URL and parameters for request
  250. path, params, err := s.buildFirstURL()
  251. if err != nil {
  252. return nil, err
  253. }
  254. // Get HTTP request body
  255. body, err := s.bodyFirst()
  256. if err != nil {
  257. return nil, err
  258. }
  259. // Get HTTP response
  260. res, err := s.client.PerformRequestWithOptions(ctx, PerformRequestOptions{
  261. Method: "POST",
  262. Path: path,
  263. Params: params,
  264. Body: body,
  265. Retrier: s.retrier,
  266. })
  267. if err != nil {
  268. return nil, err
  269. }
  270. // Return operation response
  271. ret := new(SearchResult)
  272. if err := s.client.decoder.Decode(res.Body, ret); err != nil {
  273. return nil, err
  274. }
  275. s.mu.Lock()
  276. s.scrollId = ret.ScrollId
  277. s.mu.Unlock()
  278. if ret.Hits == nil || len(ret.Hits.Hits) == 0 {
  279. return nil, io.EOF
  280. }
  281. return ret, nil
  282. }
  283. // buildFirstURL builds the URL for retrieving the first page.
  284. func (s *ScrollService) buildFirstURL() (string, url.Values, error) {
  285. // Build URL
  286. var err error
  287. var path string
  288. if len(s.indices) == 0 && len(s.types) == 0 {
  289. path = "/_search"
  290. } else if len(s.indices) > 0 && len(s.types) == 0 {
  291. path, err = uritemplates.Expand("/{index}/_search", map[string]string{
  292. "index": strings.Join(s.indices, ","),
  293. })
  294. } else if len(s.indices) == 0 && len(s.types) > 0 {
  295. path, err = uritemplates.Expand("/_all/{typ}/_search", map[string]string{
  296. "typ": strings.Join(s.types, ","),
  297. })
  298. } else {
  299. path, err = uritemplates.Expand("/{index}/{typ}/_search", map[string]string{
  300. "index": strings.Join(s.indices, ","),
  301. "typ": strings.Join(s.types, ","),
  302. })
  303. }
  304. if err != nil {
  305. return "", url.Values{}, err
  306. }
  307. // Add query string parameters
  308. params := url.Values{}
  309. if s.pretty {
  310. params.Set("pretty", "1")
  311. }
  312. if s.size != nil && *s.size > 0 {
  313. params.Set("size", fmt.Sprintf("%d", *s.size))
  314. }
  315. if len(s.keepAlive) > 0 {
  316. params.Set("scroll", s.keepAlive)
  317. }
  318. if len(s.routing) > 0 {
  319. params.Set("routing", s.routing)
  320. }
  321. if len(s.preference) > 0 {
  322. params.Set("preference", s.preference)
  323. }
  324. if s.allowNoIndices != nil {
  325. params.Set("allow_no_indices", fmt.Sprintf("%v", *s.allowNoIndices))
  326. }
  327. if len(s.expandWildcards) > 0 {
  328. params.Set("expand_wildcards", s.expandWildcards)
  329. }
  330. if s.ignoreUnavailable != nil {
  331. params.Set("ignore_unavailable", fmt.Sprintf("%v", *s.ignoreUnavailable))
  332. }
  333. return path, params, nil
  334. }
  335. // bodyFirst returns the request to fetch the first batch of results.
  336. func (s *ScrollService) bodyFirst() (interface{}, error) {
  337. var err error
  338. var body interface{}
  339. if s.body != nil {
  340. body = s.body
  341. } else {
  342. // Use _doc sort by default if none is specified
  343. if !s.ss.hasSort() {
  344. // Use efficient sorting when no user-defined query/body is specified
  345. s.ss = s.ss.SortBy(SortByDoc{})
  346. }
  347. // Body from search source
  348. body, err = s.ss.Source()
  349. if err != nil {
  350. return nil, err
  351. }
  352. }
  353. return body, nil
  354. }
  355. // -- Next --
  356. func (s *ScrollService) next(ctx context.Context) (*SearchResult, error) {
  357. // Get URL for request
  358. path, params, err := s.buildNextURL()
  359. if err != nil {
  360. return nil, err
  361. }
  362. // Setup HTTP request body
  363. body, err := s.bodyNext()
  364. if err != nil {
  365. return nil, err
  366. }
  367. // Get HTTP response
  368. res, err := s.client.PerformRequestWithOptions(ctx, PerformRequestOptions{
  369. Method: "POST",
  370. Path: path,
  371. Params: params,
  372. Body: body,
  373. Retrier: s.retrier,
  374. })
  375. if err != nil {
  376. return nil, err
  377. }
  378. // Return operation response
  379. ret := new(SearchResult)
  380. if err := s.client.decoder.Decode(res.Body, ret); err != nil {
  381. return nil, err
  382. }
  383. s.mu.Lock()
  384. s.scrollId = ret.ScrollId
  385. s.mu.Unlock()
  386. if ret.Hits == nil || len(ret.Hits.Hits) == 0 {
  387. return nil, io.EOF
  388. }
  389. return ret, nil
  390. }
  391. // buildNextURL builds the URL for the operation.
  392. func (s *ScrollService) buildNextURL() (string, url.Values, error) {
  393. path := "/_search/scroll"
  394. // Add query string parameters
  395. params := url.Values{}
  396. if s.pretty {
  397. params.Set("pretty", "1")
  398. }
  399. return path, params, nil
  400. }
  401. // body returns the request to fetch the next batch of results.
  402. func (s *ScrollService) bodyNext() (interface{}, error) {
  403. s.mu.RLock()
  404. body := struct {
  405. Scroll string `json:"scroll"`
  406. ScrollId string `json:"scroll_id,omitempty"`
  407. }{
  408. Scroll: s.keepAlive,
  409. ScrollId: s.scrollId,
  410. }
  411. s.mu.RUnlock()
  412. return body, nil
  413. }