search_aggs_bucket_significant_terms.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389
  1. // Copyright 2012-present Oliver Eilhard. All rights reserved.
  2. // Use of this source code is governed by a MIT-license.
  3. // See http://olivere.mit-license.org/license.txt for details.
  4. package elastic
  5. // SignificantSignificantTermsAggregation is an aggregation that returns interesting
  6. // or unusual occurrences of terms in a set.
  7. // See: https://www.elastic.co/guide/en/elasticsearch/reference/5.2/search-aggregations-bucket-significantterms-aggregation.html
  8. type SignificantTermsAggregation struct {
  9. field string
  10. subAggregations map[string]Aggregation
  11. meta map[string]interface{}
  12. minDocCount *int
  13. shardMinDocCount *int
  14. requiredSize *int
  15. shardSize *int
  16. filter Query
  17. executionHint string
  18. significanceHeuristic SignificanceHeuristic
  19. }
  20. func NewSignificantTermsAggregation() *SignificantTermsAggregation {
  21. return &SignificantTermsAggregation{
  22. subAggregations: make(map[string]Aggregation, 0),
  23. }
  24. }
  25. func (a *SignificantTermsAggregation) Field(field string) *SignificantTermsAggregation {
  26. a.field = field
  27. return a
  28. }
  29. func (a *SignificantTermsAggregation) SubAggregation(name string, subAggregation Aggregation) *SignificantTermsAggregation {
  30. a.subAggregations[name] = subAggregation
  31. return a
  32. }
  33. // Meta sets the meta data to be included in the aggregation response.
  34. func (a *SignificantTermsAggregation) Meta(metaData map[string]interface{}) *SignificantTermsAggregation {
  35. a.meta = metaData
  36. return a
  37. }
  38. func (a *SignificantTermsAggregation) MinDocCount(minDocCount int) *SignificantTermsAggregation {
  39. a.minDocCount = &minDocCount
  40. return a
  41. }
  42. func (a *SignificantTermsAggregation) ShardMinDocCount(shardMinDocCount int) *SignificantTermsAggregation {
  43. a.shardMinDocCount = &shardMinDocCount
  44. return a
  45. }
  46. func (a *SignificantTermsAggregation) RequiredSize(requiredSize int) *SignificantTermsAggregation {
  47. a.requiredSize = &requiredSize
  48. return a
  49. }
  50. func (a *SignificantTermsAggregation) ShardSize(shardSize int) *SignificantTermsAggregation {
  51. a.shardSize = &shardSize
  52. return a
  53. }
  54. func (a *SignificantTermsAggregation) BackgroundFilter(filter Query) *SignificantTermsAggregation {
  55. a.filter = filter
  56. return a
  57. }
  58. func (a *SignificantTermsAggregation) ExecutionHint(hint string) *SignificantTermsAggregation {
  59. a.executionHint = hint
  60. return a
  61. }
  62. func (a *SignificantTermsAggregation) SignificanceHeuristic(heuristic SignificanceHeuristic) *SignificantTermsAggregation {
  63. a.significanceHeuristic = heuristic
  64. return a
  65. }
  66. func (a *SignificantTermsAggregation) Source() (interface{}, error) {
  67. // Example:
  68. // {
  69. // "query" : {
  70. // "terms" : {"force" : [ "British Transport Police" ]}
  71. // },
  72. // "aggregations" : {
  73. // "significantCrimeTypes" : {
  74. // "significant_terms" : { "field" : "crime_type" }
  75. // }
  76. // }
  77. // }
  78. //
  79. // This method returns only the
  80. // { "significant_terms" : { "field" : "crime_type" }
  81. // part.
  82. source := make(map[string]interface{})
  83. opts := make(map[string]interface{})
  84. source["significant_terms"] = opts
  85. if a.field != "" {
  86. opts["field"] = a.field
  87. }
  88. if a.requiredSize != nil {
  89. opts["size"] = *a.requiredSize // not a typo!
  90. }
  91. if a.shardSize != nil {
  92. opts["shard_size"] = *a.shardSize
  93. }
  94. if a.minDocCount != nil {
  95. opts["min_doc_count"] = *a.minDocCount
  96. }
  97. if a.shardMinDocCount != nil {
  98. opts["shard_min_doc_count"] = *a.shardMinDocCount
  99. }
  100. if a.executionHint != "" {
  101. opts["execution_hint"] = a.executionHint
  102. }
  103. if a.filter != nil {
  104. src, err := a.filter.Source()
  105. if err != nil {
  106. return nil, err
  107. }
  108. opts["background_filter"] = src
  109. }
  110. if a.significanceHeuristic != nil {
  111. name := a.significanceHeuristic.Name()
  112. src, err := a.significanceHeuristic.Source()
  113. if err != nil {
  114. return nil, err
  115. }
  116. opts[name] = src
  117. }
  118. // AggregationBuilder (SubAggregations)
  119. if len(a.subAggregations) > 0 {
  120. aggsMap := make(map[string]interface{})
  121. source["aggregations"] = aggsMap
  122. for name, aggregate := range a.subAggregations {
  123. src, err := aggregate.Source()
  124. if err != nil {
  125. return nil, err
  126. }
  127. aggsMap[name] = src
  128. }
  129. }
  130. // Add Meta data if available
  131. if len(a.meta) > 0 {
  132. source["meta"] = a.meta
  133. }
  134. return source, nil
  135. }
  136. // -- Significance heuristics --
  137. type SignificanceHeuristic interface {
  138. Name() string
  139. Source() (interface{}, error)
  140. }
  141. // -- Chi Square --
  142. // ChiSquareSignificanceHeuristic implements Chi square as described
  143. // in "Information Retrieval", Manning et al., Chapter 13.5.2.
  144. //
  145. // See https://www.elastic.co/guide/en/elasticsearch/reference/5.2/search-aggregations-bucket-significantterms-aggregation.html#_chi_square
  146. // for details.
  147. type ChiSquareSignificanceHeuristic struct {
  148. backgroundIsSuperset *bool
  149. includeNegatives *bool
  150. }
  151. // NewChiSquareSignificanceHeuristic initializes a new ChiSquareSignificanceHeuristic.
  152. func NewChiSquareSignificanceHeuristic() *ChiSquareSignificanceHeuristic {
  153. return &ChiSquareSignificanceHeuristic{}
  154. }
  155. // Name returns the name of the heuristic in the REST interface.
  156. func (sh *ChiSquareSignificanceHeuristic) Name() string {
  157. return "chi_square"
  158. }
  159. // BackgroundIsSuperset indicates whether you defined a custom background
  160. // filter that represents a difference set of documents that you want to
  161. // compare to.
  162. func (sh *ChiSquareSignificanceHeuristic) BackgroundIsSuperset(backgroundIsSuperset bool) *ChiSquareSignificanceHeuristic {
  163. sh.backgroundIsSuperset = &backgroundIsSuperset
  164. return sh
  165. }
  166. // IncludeNegatives indicates whether to filter out the terms that appear
  167. // much less in the subset than in the background without the subset.
  168. func (sh *ChiSquareSignificanceHeuristic) IncludeNegatives(includeNegatives bool) *ChiSquareSignificanceHeuristic {
  169. sh.includeNegatives = &includeNegatives
  170. return sh
  171. }
  172. // Source returns the parameters that need to be added to the REST parameters.
  173. func (sh *ChiSquareSignificanceHeuristic) Source() (interface{}, error) {
  174. source := make(map[string]interface{})
  175. if sh.backgroundIsSuperset != nil {
  176. source["background_is_superset"] = *sh.backgroundIsSuperset
  177. }
  178. if sh.includeNegatives != nil {
  179. source["include_negatives"] = *sh.includeNegatives
  180. }
  181. return source, nil
  182. }
  183. // -- GND --
  184. // GNDSignificanceHeuristic implements the "Google Normalized Distance"
  185. // as described in "The Google Similarity Distance", Cilibrasi and Vitanyi,
  186. // 2007.
  187. //
  188. // See https://www.elastic.co/guide/en/elasticsearch/reference/5.2/search-aggregations-bucket-significantterms-aggregation.html#_google_normalized_distance
  189. // for details.
  190. type GNDSignificanceHeuristic struct {
  191. backgroundIsSuperset *bool
  192. }
  193. // NewGNDSignificanceHeuristic implements a new GNDSignificanceHeuristic.
  194. func NewGNDSignificanceHeuristic() *GNDSignificanceHeuristic {
  195. return &GNDSignificanceHeuristic{}
  196. }
  197. // Name returns the name of the heuristic in the REST interface.
  198. func (sh *GNDSignificanceHeuristic) Name() string {
  199. return "gnd"
  200. }
  201. // BackgroundIsSuperset indicates whether you defined a custom background
  202. // filter that represents a difference set of documents that you want to
  203. // compare to.
  204. func (sh *GNDSignificanceHeuristic) BackgroundIsSuperset(backgroundIsSuperset bool) *GNDSignificanceHeuristic {
  205. sh.backgroundIsSuperset = &backgroundIsSuperset
  206. return sh
  207. }
  208. // Source returns the parameters that need to be added to the REST parameters.
  209. func (sh *GNDSignificanceHeuristic) Source() (interface{}, error) {
  210. source := make(map[string]interface{})
  211. if sh.backgroundIsSuperset != nil {
  212. source["background_is_superset"] = *sh.backgroundIsSuperset
  213. }
  214. return source, nil
  215. }
  216. // -- JLH Score --
  217. // JLHScoreSignificanceHeuristic implements the JLH score as described in
  218. // https://www.elastic.co/guide/en/elasticsearch/reference/5.2/search-aggregations-bucket-significantterms-aggregation.html#_jlh_score.
  219. type JLHScoreSignificanceHeuristic struct{}
  220. // NewJLHScoreSignificanceHeuristic initializes a new JLHScoreSignificanceHeuristic.
  221. func NewJLHScoreSignificanceHeuristic() *JLHScoreSignificanceHeuristic {
  222. return &JLHScoreSignificanceHeuristic{}
  223. }
  224. // Name returns the name of the heuristic in the REST interface.
  225. func (sh *JLHScoreSignificanceHeuristic) Name() string {
  226. return "jlh"
  227. }
  228. // Source returns the parameters that need to be added to the REST parameters.
  229. func (sh *JLHScoreSignificanceHeuristic) Source() (interface{}, error) {
  230. source := make(map[string]interface{})
  231. return source, nil
  232. }
  233. // -- Mutual Information --
  234. // MutualInformationSignificanceHeuristic implements Mutual information
  235. // as described in "Information Retrieval", Manning et al., Chapter 13.5.1.
  236. //
  237. // See https://www.elastic.co/guide/en/elasticsearch/reference/5.2/search-aggregations-bucket-significantterms-aggregation.html#_mutual_information
  238. // for details.
  239. type MutualInformationSignificanceHeuristic struct {
  240. backgroundIsSuperset *bool
  241. includeNegatives *bool
  242. }
  243. // NewMutualInformationSignificanceHeuristic initializes a new instance of
  244. // MutualInformationSignificanceHeuristic.
  245. func NewMutualInformationSignificanceHeuristic() *MutualInformationSignificanceHeuristic {
  246. return &MutualInformationSignificanceHeuristic{}
  247. }
  248. // Name returns the name of the heuristic in the REST interface.
  249. func (sh *MutualInformationSignificanceHeuristic) Name() string {
  250. return "mutual_information"
  251. }
  252. // BackgroundIsSuperset indicates whether you defined a custom background
  253. // filter that represents a difference set of documents that you want to
  254. // compare to.
  255. func (sh *MutualInformationSignificanceHeuristic) BackgroundIsSuperset(backgroundIsSuperset bool) *MutualInformationSignificanceHeuristic {
  256. sh.backgroundIsSuperset = &backgroundIsSuperset
  257. return sh
  258. }
  259. // IncludeNegatives indicates whether to filter out the terms that appear
  260. // much less in the subset than in the background without the subset.
  261. func (sh *MutualInformationSignificanceHeuristic) IncludeNegatives(includeNegatives bool) *MutualInformationSignificanceHeuristic {
  262. sh.includeNegatives = &includeNegatives
  263. return sh
  264. }
  265. // Source returns the parameters that need to be added to the REST parameters.
  266. func (sh *MutualInformationSignificanceHeuristic) Source() (interface{}, error) {
  267. source := make(map[string]interface{})
  268. if sh.backgroundIsSuperset != nil {
  269. source["background_is_superset"] = *sh.backgroundIsSuperset
  270. }
  271. if sh.includeNegatives != nil {
  272. source["include_negatives"] = *sh.includeNegatives
  273. }
  274. return source, nil
  275. }
  276. // -- Percentage Score --
  277. // PercentageScoreSignificanceHeuristic implements the algorithm described
  278. // in https://www.elastic.co/guide/en/elasticsearch/reference/5.2/search-aggregations-bucket-significantterms-aggregation.html#_percentage.
  279. type PercentageScoreSignificanceHeuristic struct{}
  280. // NewPercentageScoreSignificanceHeuristic initializes a new instance of
  281. // PercentageScoreSignificanceHeuristic.
  282. func NewPercentageScoreSignificanceHeuristic() *PercentageScoreSignificanceHeuristic {
  283. return &PercentageScoreSignificanceHeuristic{}
  284. }
  285. // Name returns the name of the heuristic in the REST interface.
  286. func (sh *PercentageScoreSignificanceHeuristic) Name() string {
  287. return "percentage"
  288. }
  289. // Source returns the parameters that need to be added to the REST parameters.
  290. func (sh *PercentageScoreSignificanceHeuristic) Source() (interface{}, error) {
  291. source := make(map[string]interface{})
  292. return source, nil
  293. }
  294. // -- Script --
  295. // ScriptSignificanceHeuristic implements a scripted significance heuristic.
  296. // See https://www.elastic.co/guide/en/elasticsearch/reference/5.2/search-aggregations-bucket-significantterms-aggregation.html#_scripted
  297. // for details.
  298. type ScriptSignificanceHeuristic struct {
  299. script *Script
  300. }
  301. // NewScriptSignificanceHeuristic initializes a new instance of
  302. // ScriptSignificanceHeuristic.
  303. func NewScriptSignificanceHeuristic() *ScriptSignificanceHeuristic {
  304. return &ScriptSignificanceHeuristic{}
  305. }
  306. // Name returns the name of the heuristic in the REST interface.
  307. func (sh *ScriptSignificanceHeuristic) Name() string {
  308. return "script_heuristic"
  309. }
  310. // Script specifies the script to use to get custom scores. The following
  311. // parameters are available in the script: `_subset_freq`, `_superset_freq`,
  312. // `_subset_size`, and `_superset_size`.
  313. //
  314. // See https://www.elastic.co/guide/en/elasticsearch/reference/5.2/search-aggregations-bucket-significantterms-aggregation.html#_scripted
  315. // for details.
  316. func (sh *ScriptSignificanceHeuristic) Script(script *Script) *ScriptSignificanceHeuristic {
  317. sh.script = script
  318. return sh
  319. }
  320. // Source returns the parameters that need to be added to the REST parameters.
  321. func (sh *ScriptSignificanceHeuristic) Source() (interface{}, error) {
  322. source := make(map[string]interface{})
  323. if sh.script != nil {
  324. src, err := sh.script.Source()
  325. if err != nil {
  326. return nil, err
  327. }
  328. source["script"] = src
  329. }
  330. return source, nil
  331. }