123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389 |
- // Copyright 2012-present Oliver Eilhard. All rights reserved.
- // Use of this source code is governed by a MIT-license.
- // See http://olivere.mit-license.org/license.txt for details.
- package elastic
- // SignificantSignificantTermsAggregation is an aggregation that returns interesting
- // or unusual occurrences of terms in a set.
- // See: https://www.elastic.co/guide/en/elasticsearch/reference/5.2/search-aggregations-bucket-significantterms-aggregation.html
- type SignificantTermsAggregation struct {
- field string
- subAggregations map[string]Aggregation
- meta map[string]interface{}
- minDocCount *int
- shardMinDocCount *int
- requiredSize *int
- shardSize *int
- filter Query
- executionHint string
- significanceHeuristic SignificanceHeuristic
- }
- func NewSignificantTermsAggregation() *SignificantTermsAggregation {
- return &SignificantTermsAggregation{
- subAggregations: make(map[string]Aggregation, 0),
- }
- }
- func (a *SignificantTermsAggregation) Field(field string) *SignificantTermsAggregation {
- a.field = field
- return a
- }
- func (a *SignificantTermsAggregation) SubAggregation(name string, subAggregation Aggregation) *SignificantTermsAggregation {
- a.subAggregations[name] = subAggregation
- return a
- }
- // Meta sets the meta data to be included in the aggregation response.
- func (a *SignificantTermsAggregation) Meta(metaData map[string]interface{}) *SignificantTermsAggregation {
- a.meta = metaData
- return a
- }
- func (a *SignificantTermsAggregation) MinDocCount(minDocCount int) *SignificantTermsAggregation {
- a.minDocCount = &minDocCount
- return a
- }
- func (a *SignificantTermsAggregation) ShardMinDocCount(shardMinDocCount int) *SignificantTermsAggregation {
- a.shardMinDocCount = &shardMinDocCount
- return a
- }
- func (a *SignificantTermsAggregation) RequiredSize(requiredSize int) *SignificantTermsAggregation {
- a.requiredSize = &requiredSize
- return a
- }
- func (a *SignificantTermsAggregation) ShardSize(shardSize int) *SignificantTermsAggregation {
- a.shardSize = &shardSize
- return a
- }
- func (a *SignificantTermsAggregation) BackgroundFilter(filter Query) *SignificantTermsAggregation {
- a.filter = filter
- return a
- }
- func (a *SignificantTermsAggregation) ExecutionHint(hint string) *SignificantTermsAggregation {
- a.executionHint = hint
- return a
- }
- func (a *SignificantTermsAggregation) SignificanceHeuristic(heuristic SignificanceHeuristic) *SignificantTermsAggregation {
- a.significanceHeuristic = heuristic
- return a
- }
- func (a *SignificantTermsAggregation) Source() (interface{}, error) {
- // Example:
- // {
- // "query" : {
- // "terms" : {"force" : [ "British Transport Police" ]}
- // },
- // "aggregations" : {
- // "significantCrimeTypes" : {
- // "significant_terms" : { "field" : "crime_type" }
- // }
- // }
- // }
- //
- // This method returns only the
- // { "significant_terms" : { "field" : "crime_type" }
- // part.
- source := make(map[string]interface{})
- opts := make(map[string]interface{})
- source["significant_terms"] = opts
- if a.field != "" {
- opts["field"] = a.field
- }
- if a.requiredSize != nil {
- opts["size"] = *a.requiredSize // not a typo!
- }
- if a.shardSize != nil {
- opts["shard_size"] = *a.shardSize
- }
- if a.minDocCount != nil {
- opts["min_doc_count"] = *a.minDocCount
- }
- if a.shardMinDocCount != nil {
- opts["shard_min_doc_count"] = *a.shardMinDocCount
- }
- if a.executionHint != "" {
- opts["execution_hint"] = a.executionHint
- }
- if a.filter != nil {
- src, err := a.filter.Source()
- if err != nil {
- return nil, err
- }
- opts["background_filter"] = src
- }
- if a.significanceHeuristic != nil {
- name := a.significanceHeuristic.Name()
- src, err := a.significanceHeuristic.Source()
- if err != nil {
- return nil, err
- }
- opts[name] = src
- }
- // AggregationBuilder (SubAggregations)
- if len(a.subAggregations) > 0 {
- aggsMap := make(map[string]interface{})
- source["aggregations"] = aggsMap
- for name, aggregate := range a.subAggregations {
- src, err := aggregate.Source()
- if err != nil {
- return nil, err
- }
- aggsMap[name] = src
- }
- }
- // Add Meta data if available
- if len(a.meta) > 0 {
- source["meta"] = a.meta
- }
- return source, nil
- }
- // -- Significance heuristics --
- type SignificanceHeuristic interface {
- Name() string
- Source() (interface{}, error)
- }
- // -- Chi Square --
- // ChiSquareSignificanceHeuristic implements Chi square as described
- // in "Information Retrieval", Manning et al., Chapter 13.5.2.
- //
- // See https://www.elastic.co/guide/en/elasticsearch/reference/5.2/search-aggregations-bucket-significantterms-aggregation.html#_chi_square
- // for details.
- type ChiSquareSignificanceHeuristic struct {
- backgroundIsSuperset *bool
- includeNegatives *bool
- }
- // NewChiSquareSignificanceHeuristic initializes a new ChiSquareSignificanceHeuristic.
- func NewChiSquareSignificanceHeuristic() *ChiSquareSignificanceHeuristic {
- return &ChiSquareSignificanceHeuristic{}
- }
- // Name returns the name of the heuristic in the REST interface.
- func (sh *ChiSquareSignificanceHeuristic) Name() string {
- return "chi_square"
- }
- // BackgroundIsSuperset indicates whether you defined a custom background
- // filter that represents a difference set of documents that you want to
- // compare to.
- func (sh *ChiSquareSignificanceHeuristic) BackgroundIsSuperset(backgroundIsSuperset bool) *ChiSquareSignificanceHeuristic {
- sh.backgroundIsSuperset = &backgroundIsSuperset
- return sh
- }
- // IncludeNegatives indicates whether to filter out the terms that appear
- // much less in the subset than in the background without the subset.
- func (sh *ChiSquareSignificanceHeuristic) IncludeNegatives(includeNegatives bool) *ChiSquareSignificanceHeuristic {
- sh.includeNegatives = &includeNegatives
- return sh
- }
- // Source returns the parameters that need to be added to the REST parameters.
- func (sh *ChiSquareSignificanceHeuristic) Source() (interface{}, error) {
- source := make(map[string]interface{})
- if sh.backgroundIsSuperset != nil {
- source["background_is_superset"] = *sh.backgroundIsSuperset
- }
- if sh.includeNegatives != nil {
- source["include_negatives"] = *sh.includeNegatives
- }
- return source, nil
- }
- // -- GND --
- // GNDSignificanceHeuristic implements the "Google Normalized Distance"
- // as described in "The Google Similarity Distance", Cilibrasi and Vitanyi,
- // 2007.
- //
- // See https://www.elastic.co/guide/en/elasticsearch/reference/5.2/search-aggregations-bucket-significantterms-aggregation.html#_google_normalized_distance
- // for details.
- type GNDSignificanceHeuristic struct {
- backgroundIsSuperset *bool
- }
- // NewGNDSignificanceHeuristic implements a new GNDSignificanceHeuristic.
- func NewGNDSignificanceHeuristic() *GNDSignificanceHeuristic {
- return &GNDSignificanceHeuristic{}
- }
- // Name returns the name of the heuristic in the REST interface.
- func (sh *GNDSignificanceHeuristic) Name() string {
- return "gnd"
- }
- // BackgroundIsSuperset indicates whether you defined a custom background
- // filter that represents a difference set of documents that you want to
- // compare to.
- func (sh *GNDSignificanceHeuristic) BackgroundIsSuperset(backgroundIsSuperset bool) *GNDSignificanceHeuristic {
- sh.backgroundIsSuperset = &backgroundIsSuperset
- return sh
- }
- // Source returns the parameters that need to be added to the REST parameters.
- func (sh *GNDSignificanceHeuristic) Source() (interface{}, error) {
- source := make(map[string]interface{})
- if sh.backgroundIsSuperset != nil {
- source["background_is_superset"] = *sh.backgroundIsSuperset
- }
- return source, nil
- }
- // -- JLH Score --
- // JLHScoreSignificanceHeuristic implements the JLH score as described in
- // https://www.elastic.co/guide/en/elasticsearch/reference/5.2/search-aggregations-bucket-significantterms-aggregation.html#_jlh_score.
- type JLHScoreSignificanceHeuristic struct{}
- // NewJLHScoreSignificanceHeuristic initializes a new JLHScoreSignificanceHeuristic.
- func NewJLHScoreSignificanceHeuristic() *JLHScoreSignificanceHeuristic {
- return &JLHScoreSignificanceHeuristic{}
- }
- // Name returns the name of the heuristic in the REST interface.
- func (sh *JLHScoreSignificanceHeuristic) Name() string {
- return "jlh"
- }
- // Source returns the parameters that need to be added to the REST parameters.
- func (sh *JLHScoreSignificanceHeuristic) Source() (interface{}, error) {
- source := make(map[string]interface{})
- return source, nil
- }
- // -- Mutual Information --
- // MutualInformationSignificanceHeuristic implements Mutual information
- // as described in "Information Retrieval", Manning et al., Chapter 13.5.1.
- //
- // See https://www.elastic.co/guide/en/elasticsearch/reference/5.2/search-aggregations-bucket-significantterms-aggregation.html#_mutual_information
- // for details.
- type MutualInformationSignificanceHeuristic struct {
- backgroundIsSuperset *bool
- includeNegatives *bool
- }
- // NewMutualInformationSignificanceHeuristic initializes a new instance of
- // MutualInformationSignificanceHeuristic.
- func NewMutualInformationSignificanceHeuristic() *MutualInformationSignificanceHeuristic {
- return &MutualInformationSignificanceHeuristic{}
- }
- // Name returns the name of the heuristic in the REST interface.
- func (sh *MutualInformationSignificanceHeuristic) Name() string {
- return "mutual_information"
- }
- // BackgroundIsSuperset indicates whether you defined a custom background
- // filter that represents a difference set of documents that you want to
- // compare to.
- func (sh *MutualInformationSignificanceHeuristic) BackgroundIsSuperset(backgroundIsSuperset bool) *MutualInformationSignificanceHeuristic {
- sh.backgroundIsSuperset = &backgroundIsSuperset
- return sh
- }
- // IncludeNegatives indicates whether to filter out the terms that appear
- // much less in the subset than in the background without the subset.
- func (sh *MutualInformationSignificanceHeuristic) IncludeNegatives(includeNegatives bool) *MutualInformationSignificanceHeuristic {
- sh.includeNegatives = &includeNegatives
- return sh
- }
- // Source returns the parameters that need to be added to the REST parameters.
- func (sh *MutualInformationSignificanceHeuristic) Source() (interface{}, error) {
- source := make(map[string]interface{})
- if sh.backgroundIsSuperset != nil {
- source["background_is_superset"] = *sh.backgroundIsSuperset
- }
- if sh.includeNegatives != nil {
- source["include_negatives"] = *sh.includeNegatives
- }
- return source, nil
- }
- // -- Percentage Score --
- // PercentageScoreSignificanceHeuristic implements the algorithm described
- // in https://www.elastic.co/guide/en/elasticsearch/reference/5.2/search-aggregations-bucket-significantterms-aggregation.html#_percentage.
- type PercentageScoreSignificanceHeuristic struct{}
- // NewPercentageScoreSignificanceHeuristic initializes a new instance of
- // PercentageScoreSignificanceHeuristic.
- func NewPercentageScoreSignificanceHeuristic() *PercentageScoreSignificanceHeuristic {
- return &PercentageScoreSignificanceHeuristic{}
- }
- // Name returns the name of the heuristic in the REST interface.
- func (sh *PercentageScoreSignificanceHeuristic) Name() string {
- return "percentage"
- }
- // Source returns the parameters that need to be added to the REST parameters.
- func (sh *PercentageScoreSignificanceHeuristic) Source() (interface{}, error) {
- source := make(map[string]interface{})
- return source, nil
- }
- // -- Script --
- // ScriptSignificanceHeuristic implements a scripted significance heuristic.
- // See https://www.elastic.co/guide/en/elasticsearch/reference/5.2/search-aggregations-bucket-significantterms-aggregation.html#_scripted
- // for details.
- type ScriptSignificanceHeuristic struct {
- script *Script
- }
- // NewScriptSignificanceHeuristic initializes a new instance of
- // ScriptSignificanceHeuristic.
- func NewScriptSignificanceHeuristic() *ScriptSignificanceHeuristic {
- return &ScriptSignificanceHeuristic{}
- }
- // Name returns the name of the heuristic in the REST interface.
- func (sh *ScriptSignificanceHeuristic) Name() string {
- return "script_heuristic"
- }
- // Script specifies the script to use to get custom scores. The following
- // parameters are available in the script: `_subset_freq`, `_superset_freq`,
- // `_subset_size`, and `_superset_size`.
- //
- // See https://www.elastic.co/guide/en/elasticsearch/reference/5.2/search-aggregations-bucket-significantterms-aggregation.html#_scripted
- // for details.
- func (sh *ScriptSignificanceHeuristic) Script(script *Script) *ScriptSignificanceHeuristic {
- sh.script = script
- return sh
- }
- // Source returns the parameters that need to be added to the REST parameters.
- func (sh *ScriptSignificanceHeuristic) Source() (interface{}, error) {
- source := make(map[string]interface{})
- if sh.script != nil {
- src, err := sh.script.Source()
- if err != nil {
- return nil, err
- }
- source["script"] = src
- }
- return source, nil
- }
|