123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459 |
- // Copyright 2012-present Oliver Eilhard. All rights reserved.
- // Use of this source code is governed by a MIT-license.
- // See http://olivere.mit-license.org/license.txt for details.
- package elastic
- import (
- "context"
- "fmt"
- "net/url"
- "strings"
- "gopkg.in/olivere/elastic.v5/uritemplates"
- )
- // TermvectorsService returns information and statistics on terms in the
- // fields of a particular document. The document could be stored in the
- // index or artificially provided by the user.
- //
- // See https://www.elastic.co/guide/en/elasticsearch/reference/5.2/docs-termvectors.html
- // for documentation.
- type TermvectorsService struct {
- client *Client
- pretty bool
- id string
- index string
- typ string
- dfs *bool
- doc interface{}
- fieldStatistics *bool
- fields []string
- filter *TermvectorsFilterSettings
- perFieldAnalyzer map[string]string
- offsets *bool
- parent string
- payloads *bool
- positions *bool
- preference string
- realtime *bool
- routing string
- termStatistics *bool
- version interface{}
- versionType string
- bodyJson interface{}
- bodyString string
- }
- // NewTermvectorsService creates a new TermvectorsService.
- func NewTermvectorsService(client *Client) *TermvectorsService {
- return &TermvectorsService{
- client: client,
- }
- }
- // Index in which the document resides.
- func (s *TermvectorsService) Index(index string) *TermvectorsService {
- s.index = index
- return s
- }
- // Type of the document.
- func (s *TermvectorsService) Type(typ string) *TermvectorsService {
- s.typ = typ
- return s
- }
- // Id of the document.
- func (s *TermvectorsService) Id(id string) *TermvectorsService {
- s.id = id
- return s
- }
- // Dfs specifies if distributed frequencies should be returned instead
- // shard frequencies.
- func (s *TermvectorsService) Dfs(dfs bool) *TermvectorsService {
- s.dfs = &dfs
- return s
- }
- // Doc is the document to analyze.
- func (s *TermvectorsService) Doc(doc interface{}) *TermvectorsService {
- s.doc = doc
- return s
- }
- // FieldStatistics specifies if document count, sum of document frequencies
- // and sum of total term frequencies should be returned.
- func (s *TermvectorsService) FieldStatistics(fieldStatistics bool) *TermvectorsService {
- s.fieldStatistics = &fieldStatistics
- return s
- }
- // Fields a list of fields to return.
- func (s *TermvectorsService) Fields(fields ...string) *TermvectorsService {
- if s.fields == nil {
- s.fields = make([]string, 0)
- }
- s.fields = append(s.fields, fields...)
- return s
- }
- // Filter adds terms filter settings.
- func (s *TermvectorsService) Filter(filter *TermvectorsFilterSettings) *TermvectorsService {
- s.filter = filter
- return s
- }
- // PerFieldAnalyzer allows to specify a different analyzer than the one
- // at the field.
- func (s *TermvectorsService) PerFieldAnalyzer(perFieldAnalyzer map[string]string) *TermvectorsService {
- s.perFieldAnalyzer = perFieldAnalyzer
- return s
- }
- // Offsets specifies if term offsets should be returned.
- func (s *TermvectorsService) Offsets(offsets bool) *TermvectorsService {
- s.offsets = &offsets
- return s
- }
- // Parent id of documents.
- func (s *TermvectorsService) Parent(parent string) *TermvectorsService {
- s.parent = parent
- return s
- }
- // Payloads specifies if term payloads should be returned.
- func (s *TermvectorsService) Payloads(payloads bool) *TermvectorsService {
- s.payloads = &payloads
- return s
- }
- // Positions specifies if term positions should be returned.
- func (s *TermvectorsService) Positions(positions bool) *TermvectorsService {
- s.positions = &positions
- return s
- }
- // Preference specify the node or shard the operation
- // should be performed on (default: random).
- func (s *TermvectorsService) Preference(preference string) *TermvectorsService {
- s.preference = preference
- return s
- }
- // Realtime specifies if request is real-time as opposed to
- // near-real-time (default: true).
- func (s *TermvectorsService) Realtime(realtime bool) *TermvectorsService {
- s.realtime = &realtime
- return s
- }
- // Routing is a specific routing value.
- func (s *TermvectorsService) Routing(routing string) *TermvectorsService {
- s.routing = routing
- return s
- }
- // TermStatistics specifies if total term frequency and document frequency
- // should be returned.
- func (s *TermvectorsService) TermStatistics(termStatistics bool) *TermvectorsService {
- s.termStatistics = &termStatistics
- return s
- }
- // Version an explicit version number for concurrency control.
- func (s *TermvectorsService) Version(version interface{}) *TermvectorsService {
- s.version = version
- return s
- }
- // VersionType specifies a version type ("internal", "external", "external_gte", or "force").
- func (s *TermvectorsService) VersionType(versionType string) *TermvectorsService {
- s.versionType = versionType
- return s
- }
- // Pretty indicates that the JSON response be indented and human readable.
- func (s *TermvectorsService) Pretty(pretty bool) *TermvectorsService {
- s.pretty = pretty
- return s
- }
- // BodyJson defines the body parameters. See documentation.
- func (s *TermvectorsService) BodyJson(body interface{}) *TermvectorsService {
- s.bodyJson = body
- return s
- }
- // BodyString defines the body parameters as a string. See documentation.
- func (s *TermvectorsService) BodyString(body string) *TermvectorsService {
- s.bodyString = body
- return s
- }
- // buildURL builds the URL for the operation.
- func (s *TermvectorsService) buildURL() (string, url.Values, error) {
- var pathParam = map[string]string{
- "index": s.index,
- "type": s.typ,
- }
- var path string
- var err error
- // Build URL
- if s.id != "" {
- pathParam["id"] = s.id
- path, err = uritemplates.Expand("/{index}/{type}/{id}/_termvectors", pathParam)
- } else {
- path, err = uritemplates.Expand("/{index}/{type}/_termvectors", pathParam)
- }
- if err != nil {
- return "", url.Values{}, err
- }
- // Add query string parameters
- params := url.Values{}
- if s.pretty {
- params.Set("pretty", "1")
- }
- if s.dfs != nil {
- params.Set("dfs", fmt.Sprintf("%v", *s.dfs))
- }
- if s.fieldStatistics != nil {
- params.Set("field_statistics", fmt.Sprintf("%v", *s.fieldStatistics))
- }
- if len(s.fields) > 0 {
- params.Set("fields", strings.Join(s.fields, ","))
- }
- if s.offsets != nil {
- params.Set("offsets", fmt.Sprintf("%v", *s.offsets))
- }
- if s.parent != "" {
- params.Set("parent", s.parent)
- }
- if s.payloads != nil {
- params.Set("payloads", fmt.Sprintf("%v", *s.payloads))
- }
- if s.positions != nil {
- params.Set("positions", fmt.Sprintf("%v", *s.positions))
- }
- if s.preference != "" {
- params.Set("preference", s.preference)
- }
- if s.realtime != nil {
- params.Set("realtime", fmt.Sprintf("%v", *s.realtime))
- }
- if s.routing != "" {
- params.Set("routing", s.routing)
- }
- if s.termStatistics != nil {
- params.Set("term_statistics", fmt.Sprintf("%v", *s.termStatistics))
- }
- if s.version != nil {
- params.Set("version", fmt.Sprintf("%v", s.version))
- }
- if s.versionType != "" {
- params.Set("version_type", s.versionType)
- }
- return path, params, nil
- }
- // Validate checks if the operation is valid.
- func (s *TermvectorsService) Validate() error {
- var invalid []string
- if s.index == "" {
- invalid = append(invalid, "Index")
- }
- if s.typ == "" {
- invalid = append(invalid, "Type")
- }
- if len(invalid) > 0 {
- return fmt.Errorf("missing required fields: %v", invalid)
- }
- return nil
- }
- // Do executes the operation.
- func (s *TermvectorsService) Do(ctx context.Context) (*TermvectorsResponse, error) {
- // Check pre-conditions
- if err := s.Validate(); err != nil {
- return nil, err
- }
- // Get URL for request
- path, params, err := s.buildURL()
- if err != nil {
- return nil, err
- }
- // Setup HTTP request body
- var body interface{}
- if s.bodyJson != nil {
- body = s.bodyJson
- } else if s.bodyString != "" {
- body = s.bodyString
- } else {
- data := make(map[string]interface{})
- if s.doc != nil {
- data["doc"] = s.doc
- }
- if len(s.perFieldAnalyzer) > 0 {
- data["per_field_analyzer"] = s.perFieldAnalyzer
- }
- if s.filter != nil {
- src, err := s.filter.Source()
- if err != nil {
- return nil, err
- }
- data["filter"] = src
- }
- if len(data) > 0 {
- body = data
- }
- }
- // Get HTTP response
- res, err := s.client.PerformRequest(ctx, "GET", path, params, body)
- if err != nil {
- return nil, err
- }
- // Return operation response
- ret := new(TermvectorsResponse)
- if err := s.client.decoder.Decode(res.Body, ret); err != nil {
- return nil, err
- }
- return ret, nil
- }
- // -- Filter settings --
- // TermvectorsFilterSettings adds additional filters to a Termsvector request.
- // It allows to filter terms based on their tf-idf scores.
- // See https://www.elastic.co/guide/en/elasticsearch/reference/5.2/docs-termvectors.html#_terms_filtering
- // for more information.
- type TermvectorsFilterSettings struct {
- maxNumTerms *int64
- minTermFreq *int64
- maxTermFreq *int64
- minDocFreq *int64
- maxDocFreq *int64
- minWordLength *int64
- maxWordLength *int64
- }
- // NewTermvectorsFilterSettings creates and initializes a new TermvectorsFilterSettings struct.
- func NewTermvectorsFilterSettings() *TermvectorsFilterSettings {
- return &TermvectorsFilterSettings{}
- }
- // MaxNumTerms specifies the maximum number of terms the must be returned per field.
- func (fs *TermvectorsFilterSettings) MaxNumTerms(value int64) *TermvectorsFilterSettings {
- fs.maxNumTerms = &value
- return fs
- }
- // MinTermFreq ignores words with less than this frequency in the source doc.
- func (fs *TermvectorsFilterSettings) MinTermFreq(value int64) *TermvectorsFilterSettings {
- fs.minTermFreq = &value
- return fs
- }
- // MaxTermFreq ignores words with more than this frequency in the source doc.
- func (fs *TermvectorsFilterSettings) MaxTermFreq(value int64) *TermvectorsFilterSettings {
- fs.maxTermFreq = &value
- return fs
- }
- // MinDocFreq ignores terms which do not occur in at least this many docs.
- func (fs *TermvectorsFilterSettings) MinDocFreq(value int64) *TermvectorsFilterSettings {
- fs.minDocFreq = &value
- return fs
- }
- // MaxDocFreq ignores terms which occur in more than this many docs.
- func (fs *TermvectorsFilterSettings) MaxDocFreq(value int64) *TermvectorsFilterSettings {
- fs.maxDocFreq = &value
- return fs
- }
- // MinWordLength specifies the minimum word length below which words will be ignored.
- func (fs *TermvectorsFilterSettings) MinWordLength(value int64) *TermvectorsFilterSettings {
- fs.minWordLength = &value
- return fs
- }
- // MaxWordLength specifies the maximum word length above which words will be ignored.
- func (fs *TermvectorsFilterSettings) MaxWordLength(value int64) *TermvectorsFilterSettings {
- fs.maxWordLength = &value
- return fs
- }
- // Source returns JSON for the query.
- func (fs *TermvectorsFilterSettings) Source() (interface{}, error) {
- source := make(map[string]interface{})
- if fs.maxNumTerms != nil {
- source["max_num_terms"] = *fs.maxNumTerms
- }
- if fs.minTermFreq != nil {
- source["min_term_freq"] = *fs.minTermFreq
- }
- if fs.maxTermFreq != nil {
- source["max_term_freq"] = *fs.maxTermFreq
- }
- if fs.minDocFreq != nil {
- source["min_doc_freq"] = *fs.minDocFreq
- }
- if fs.maxDocFreq != nil {
- source["max_doc_freq"] = *fs.maxDocFreq
- }
- if fs.minWordLength != nil {
- source["min_word_length"] = *fs.minWordLength
- }
- if fs.maxWordLength != nil {
- source["max_word_length"] = *fs.maxWordLength
- }
- return source, nil
- }
- // -- Response types --
- type TokenInfo struct {
- StartOffset int64 `json:"start_offset"`
- EndOffset int64 `json:"end_offset"`
- Position int64 `json:"position"`
- Payload string `json:"payload"`
- }
- type TermsInfo struct {
- DocFreq int64 `json:"doc_freq"`
- Score float64 `json:"score"`
- TermFreq int64 `json:"term_freq"`
- Ttf int64 `json:"ttf"`
- Tokens []TokenInfo `json:"tokens"`
- }
- type FieldStatistics struct {
- DocCount int64 `json:"doc_count"`
- SumDocFreq int64 `json:"sum_doc_freq"`
- SumTtf int64 `json:"sum_ttf"`
- }
- type TermVectorsFieldInfo struct {
- FieldStatistics FieldStatistics `json:"field_statistics"`
- Terms map[string]TermsInfo `json:"terms"`
- }
- // TermvectorsResponse is the response of TermvectorsService.Do.
- type TermvectorsResponse struct {
- Index string `json:"_index"`
- Type string `json:"_type"`
- Id string `json:"_id,omitempty"`
- Version int `json:"_version"`
- Found bool `json:"found"`
- Took int64 `json:"took"`
- TermVectors map[string]TermVectorsFieldInfo `json:"term_vectors"`
- }
|