termvectors.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459
  1. // Copyright 2012-present Oliver Eilhard. All rights reserved.
  2. // Use of this source code is governed by a MIT-license.
  3. // See http://olivere.mit-license.org/license.txt for details.
  4. package elastic
  5. import (
  6. "context"
  7. "fmt"
  8. "net/url"
  9. "strings"
  10. "gopkg.in/olivere/elastic.v5/uritemplates"
  11. )
  12. // TermvectorsService returns information and statistics on terms in the
  13. // fields of a particular document. The document could be stored in the
  14. // index or artificially provided by the user.
  15. //
  16. // See https://www.elastic.co/guide/en/elasticsearch/reference/5.2/docs-termvectors.html
  17. // for documentation.
  18. type TermvectorsService struct {
  19. client *Client
  20. pretty bool
  21. id string
  22. index string
  23. typ string
  24. dfs *bool
  25. doc interface{}
  26. fieldStatistics *bool
  27. fields []string
  28. filter *TermvectorsFilterSettings
  29. perFieldAnalyzer map[string]string
  30. offsets *bool
  31. parent string
  32. payloads *bool
  33. positions *bool
  34. preference string
  35. realtime *bool
  36. routing string
  37. termStatistics *bool
  38. version interface{}
  39. versionType string
  40. bodyJson interface{}
  41. bodyString string
  42. }
  43. // NewTermvectorsService creates a new TermvectorsService.
  44. func NewTermvectorsService(client *Client) *TermvectorsService {
  45. return &TermvectorsService{
  46. client: client,
  47. }
  48. }
  49. // Index in which the document resides.
  50. func (s *TermvectorsService) Index(index string) *TermvectorsService {
  51. s.index = index
  52. return s
  53. }
  54. // Type of the document.
  55. func (s *TermvectorsService) Type(typ string) *TermvectorsService {
  56. s.typ = typ
  57. return s
  58. }
  59. // Id of the document.
  60. func (s *TermvectorsService) Id(id string) *TermvectorsService {
  61. s.id = id
  62. return s
  63. }
  64. // Dfs specifies if distributed frequencies should be returned instead
  65. // shard frequencies.
  66. func (s *TermvectorsService) Dfs(dfs bool) *TermvectorsService {
  67. s.dfs = &dfs
  68. return s
  69. }
  70. // Doc is the document to analyze.
  71. func (s *TermvectorsService) Doc(doc interface{}) *TermvectorsService {
  72. s.doc = doc
  73. return s
  74. }
  75. // FieldStatistics specifies if document count, sum of document frequencies
  76. // and sum of total term frequencies should be returned.
  77. func (s *TermvectorsService) FieldStatistics(fieldStatistics bool) *TermvectorsService {
  78. s.fieldStatistics = &fieldStatistics
  79. return s
  80. }
  81. // Fields a list of fields to return.
  82. func (s *TermvectorsService) Fields(fields ...string) *TermvectorsService {
  83. if s.fields == nil {
  84. s.fields = make([]string, 0)
  85. }
  86. s.fields = append(s.fields, fields...)
  87. return s
  88. }
  89. // Filter adds terms filter settings.
  90. func (s *TermvectorsService) Filter(filter *TermvectorsFilterSettings) *TermvectorsService {
  91. s.filter = filter
  92. return s
  93. }
  94. // PerFieldAnalyzer allows to specify a different analyzer than the one
  95. // at the field.
  96. func (s *TermvectorsService) PerFieldAnalyzer(perFieldAnalyzer map[string]string) *TermvectorsService {
  97. s.perFieldAnalyzer = perFieldAnalyzer
  98. return s
  99. }
  100. // Offsets specifies if term offsets should be returned.
  101. func (s *TermvectorsService) Offsets(offsets bool) *TermvectorsService {
  102. s.offsets = &offsets
  103. return s
  104. }
  105. // Parent id of documents.
  106. func (s *TermvectorsService) Parent(parent string) *TermvectorsService {
  107. s.parent = parent
  108. return s
  109. }
  110. // Payloads specifies if term payloads should be returned.
  111. func (s *TermvectorsService) Payloads(payloads bool) *TermvectorsService {
  112. s.payloads = &payloads
  113. return s
  114. }
  115. // Positions specifies if term positions should be returned.
  116. func (s *TermvectorsService) Positions(positions bool) *TermvectorsService {
  117. s.positions = &positions
  118. return s
  119. }
  120. // Preference specify the node or shard the operation
  121. // should be performed on (default: random).
  122. func (s *TermvectorsService) Preference(preference string) *TermvectorsService {
  123. s.preference = preference
  124. return s
  125. }
  126. // Realtime specifies if request is real-time as opposed to
  127. // near-real-time (default: true).
  128. func (s *TermvectorsService) Realtime(realtime bool) *TermvectorsService {
  129. s.realtime = &realtime
  130. return s
  131. }
  132. // Routing is a specific routing value.
  133. func (s *TermvectorsService) Routing(routing string) *TermvectorsService {
  134. s.routing = routing
  135. return s
  136. }
  137. // TermStatistics specifies if total term frequency and document frequency
  138. // should be returned.
  139. func (s *TermvectorsService) TermStatistics(termStatistics bool) *TermvectorsService {
  140. s.termStatistics = &termStatistics
  141. return s
  142. }
  143. // Version an explicit version number for concurrency control.
  144. func (s *TermvectorsService) Version(version interface{}) *TermvectorsService {
  145. s.version = version
  146. return s
  147. }
  148. // VersionType specifies a version type ("internal", "external", "external_gte", or "force").
  149. func (s *TermvectorsService) VersionType(versionType string) *TermvectorsService {
  150. s.versionType = versionType
  151. return s
  152. }
  153. // Pretty indicates that the JSON response be indented and human readable.
  154. func (s *TermvectorsService) Pretty(pretty bool) *TermvectorsService {
  155. s.pretty = pretty
  156. return s
  157. }
  158. // BodyJson defines the body parameters. See documentation.
  159. func (s *TermvectorsService) BodyJson(body interface{}) *TermvectorsService {
  160. s.bodyJson = body
  161. return s
  162. }
  163. // BodyString defines the body parameters as a string. See documentation.
  164. func (s *TermvectorsService) BodyString(body string) *TermvectorsService {
  165. s.bodyString = body
  166. return s
  167. }
  168. // buildURL builds the URL for the operation.
  169. func (s *TermvectorsService) buildURL() (string, url.Values, error) {
  170. var pathParam = map[string]string{
  171. "index": s.index,
  172. "type": s.typ,
  173. }
  174. var path string
  175. var err error
  176. // Build URL
  177. if s.id != "" {
  178. pathParam["id"] = s.id
  179. path, err = uritemplates.Expand("/{index}/{type}/{id}/_termvectors", pathParam)
  180. } else {
  181. path, err = uritemplates.Expand("/{index}/{type}/_termvectors", pathParam)
  182. }
  183. if err != nil {
  184. return "", url.Values{}, err
  185. }
  186. // Add query string parameters
  187. params := url.Values{}
  188. if s.pretty {
  189. params.Set("pretty", "1")
  190. }
  191. if s.dfs != nil {
  192. params.Set("dfs", fmt.Sprintf("%v", *s.dfs))
  193. }
  194. if s.fieldStatistics != nil {
  195. params.Set("field_statistics", fmt.Sprintf("%v", *s.fieldStatistics))
  196. }
  197. if len(s.fields) > 0 {
  198. params.Set("fields", strings.Join(s.fields, ","))
  199. }
  200. if s.offsets != nil {
  201. params.Set("offsets", fmt.Sprintf("%v", *s.offsets))
  202. }
  203. if s.parent != "" {
  204. params.Set("parent", s.parent)
  205. }
  206. if s.payloads != nil {
  207. params.Set("payloads", fmt.Sprintf("%v", *s.payloads))
  208. }
  209. if s.positions != nil {
  210. params.Set("positions", fmt.Sprintf("%v", *s.positions))
  211. }
  212. if s.preference != "" {
  213. params.Set("preference", s.preference)
  214. }
  215. if s.realtime != nil {
  216. params.Set("realtime", fmt.Sprintf("%v", *s.realtime))
  217. }
  218. if s.routing != "" {
  219. params.Set("routing", s.routing)
  220. }
  221. if s.termStatistics != nil {
  222. params.Set("term_statistics", fmt.Sprintf("%v", *s.termStatistics))
  223. }
  224. if s.version != nil {
  225. params.Set("version", fmt.Sprintf("%v", s.version))
  226. }
  227. if s.versionType != "" {
  228. params.Set("version_type", s.versionType)
  229. }
  230. return path, params, nil
  231. }
  232. // Validate checks if the operation is valid.
  233. func (s *TermvectorsService) Validate() error {
  234. var invalid []string
  235. if s.index == "" {
  236. invalid = append(invalid, "Index")
  237. }
  238. if s.typ == "" {
  239. invalid = append(invalid, "Type")
  240. }
  241. if len(invalid) > 0 {
  242. return fmt.Errorf("missing required fields: %v", invalid)
  243. }
  244. return nil
  245. }
  246. // Do executes the operation.
  247. func (s *TermvectorsService) Do(ctx context.Context) (*TermvectorsResponse, error) {
  248. // Check pre-conditions
  249. if err := s.Validate(); err != nil {
  250. return nil, err
  251. }
  252. // Get URL for request
  253. path, params, err := s.buildURL()
  254. if err != nil {
  255. return nil, err
  256. }
  257. // Setup HTTP request body
  258. var body interface{}
  259. if s.bodyJson != nil {
  260. body = s.bodyJson
  261. } else if s.bodyString != "" {
  262. body = s.bodyString
  263. } else {
  264. data := make(map[string]interface{})
  265. if s.doc != nil {
  266. data["doc"] = s.doc
  267. }
  268. if len(s.perFieldAnalyzer) > 0 {
  269. data["per_field_analyzer"] = s.perFieldAnalyzer
  270. }
  271. if s.filter != nil {
  272. src, err := s.filter.Source()
  273. if err != nil {
  274. return nil, err
  275. }
  276. data["filter"] = src
  277. }
  278. if len(data) > 0 {
  279. body = data
  280. }
  281. }
  282. // Get HTTP response
  283. res, err := s.client.PerformRequest(ctx, "GET", path, params, body)
  284. if err != nil {
  285. return nil, err
  286. }
  287. // Return operation response
  288. ret := new(TermvectorsResponse)
  289. if err := s.client.decoder.Decode(res.Body, ret); err != nil {
  290. return nil, err
  291. }
  292. return ret, nil
  293. }
  294. // -- Filter settings --
  295. // TermvectorsFilterSettings adds additional filters to a Termsvector request.
  296. // It allows to filter terms based on their tf-idf scores.
  297. // See https://www.elastic.co/guide/en/elasticsearch/reference/5.2/docs-termvectors.html#_terms_filtering
  298. // for more information.
  299. type TermvectorsFilterSettings struct {
  300. maxNumTerms *int64
  301. minTermFreq *int64
  302. maxTermFreq *int64
  303. minDocFreq *int64
  304. maxDocFreq *int64
  305. minWordLength *int64
  306. maxWordLength *int64
  307. }
  308. // NewTermvectorsFilterSettings creates and initializes a new TermvectorsFilterSettings struct.
  309. func NewTermvectorsFilterSettings() *TermvectorsFilterSettings {
  310. return &TermvectorsFilterSettings{}
  311. }
  312. // MaxNumTerms specifies the maximum number of terms the must be returned per field.
  313. func (fs *TermvectorsFilterSettings) MaxNumTerms(value int64) *TermvectorsFilterSettings {
  314. fs.maxNumTerms = &value
  315. return fs
  316. }
  317. // MinTermFreq ignores words with less than this frequency in the source doc.
  318. func (fs *TermvectorsFilterSettings) MinTermFreq(value int64) *TermvectorsFilterSettings {
  319. fs.minTermFreq = &value
  320. return fs
  321. }
  322. // MaxTermFreq ignores words with more than this frequency in the source doc.
  323. func (fs *TermvectorsFilterSettings) MaxTermFreq(value int64) *TermvectorsFilterSettings {
  324. fs.maxTermFreq = &value
  325. return fs
  326. }
  327. // MinDocFreq ignores terms which do not occur in at least this many docs.
  328. func (fs *TermvectorsFilterSettings) MinDocFreq(value int64) *TermvectorsFilterSettings {
  329. fs.minDocFreq = &value
  330. return fs
  331. }
  332. // MaxDocFreq ignores terms which occur in more than this many docs.
  333. func (fs *TermvectorsFilterSettings) MaxDocFreq(value int64) *TermvectorsFilterSettings {
  334. fs.maxDocFreq = &value
  335. return fs
  336. }
  337. // MinWordLength specifies the minimum word length below which words will be ignored.
  338. func (fs *TermvectorsFilterSettings) MinWordLength(value int64) *TermvectorsFilterSettings {
  339. fs.minWordLength = &value
  340. return fs
  341. }
  342. // MaxWordLength specifies the maximum word length above which words will be ignored.
  343. func (fs *TermvectorsFilterSettings) MaxWordLength(value int64) *TermvectorsFilterSettings {
  344. fs.maxWordLength = &value
  345. return fs
  346. }
  347. // Source returns JSON for the query.
  348. func (fs *TermvectorsFilterSettings) Source() (interface{}, error) {
  349. source := make(map[string]interface{})
  350. if fs.maxNumTerms != nil {
  351. source["max_num_terms"] = *fs.maxNumTerms
  352. }
  353. if fs.minTermFreq != nil {
  354. source["min_term_freq"] = *fs.minTermFreq
  355. }
  356. if fs.maxTermFreq != nil {
  357. source["max_term_freq"] = *fs.maxTermFreq
  358. }
  359. if fs.minDocFreq != nil {
  360. source["min_doc_freq"] = *fs.minDocFreq
  361. }
  362. if fs.maxDocFreq != nil {
  363. source["max_doc_freq"] = *fs.maxDocFreq
  364. }
  365. if fs.minWordLength != nil {
  366. source["min_word_length"] = *fs.minWordLength
  367. }
  368. if fs.maxWordLength != nil {
  369. source["max_word_length"] = *fs.maxWordLength
  370. }
  371. return source, nil
  372. }
  373. // -- Response types --
  374. type TokenInfo struct {
  375. StartOffset int64 `json:"start_offset"`
  376. EndOffset int64 `json:"end_offset"`
  377. Position int64 `json:"position"`
  378. Payload string `json:"payload"`
  379. }
  380. type TermsInfo struct {
  381. DocFreq int64 `json:"doc_freq"`
  382. Score float64 `json:"score"`
  383. TermFreq int64 `json:"term_freq"`
  384. Ttf int64 `json:"ttf"`
  385. Tokens []TokenInfo `json:"tokens"`
  386. }
  387. type FieldStatistics struct {
  388. DocCount int64 `json:"doc_count"`
  389. SumDocFreq int64 `json:"sum_doc_freq"`
  390. SumTtf int64 `json:"sum_ttf"`
  391. }
  392. type TermVectorsFieldInfo struct {
  393. FieldStatistics FieldStatistics `json:"field_statistics"`
  394. Terms map[string]TermsInfo `json:"terms"`
  395. }
  396. // TermvectorsResponse is the response of TermvectorsService.Do.
  397. type TermvectorsResponse struct {
  398. Index string `json:"_index"`
  399. Type string `json:"_type"`
  400. Id string `json:"_id,omitempty"`
  401. Version int `json:"_version"`
  402. Found bool `json:"found"`
  403. Took int64 `json:"took"`
  404. TermVectors map[string]TermVectorsFieldInfo `json:"term_vectors"`
  405. }