#
Transform aggregations
When using buckets aggregation and/or pipeline aggregation, the path to the expected values can get complicated and become subject to unexpected changes for a public API.
ArticleSearch.aggregate(popular_tags_since: 1.month.ago).aggregations.popular_tags_since.published.buckets.pluck(:key)
=> ["Blog", "Tech", …]
Instead, you can define transformations to provide simpler access to aggregated data:
class ArticleSearch < Caoutsearch::Search::Base
has_aggregation :popular_tags_since do |since|
# …
end
transform_aggregation :popular_tags_since do |aggs|
aggs.dig(:popular_tags_since, :published, :buckets).pluck(:key)
end
end
ArticleSearch.aggregate(popular_tags_since: 1.month.ago).aggregations.popular_tags_since
=> ["Blog", "Tech", …]
You can also use transformations to combine multiple aggregations:
class ArticleSearch < Caoutsearch::Search::Base
has_aggregation :blog_count, { filter: { term: { category: "blog" } } }
has_aggregation :archives_count, { filter: { term: { archived: true } } }
transform_aggregation :stats, from: %i[blog_count archives_count] do |aggs|
{
blog_count: aggs.dig(:blog_count, :doc_count),
archives_count: aggs.dig(:archives, :doc_count)
}
end
end
ArticleSearch.aggregate(:stats).aggregations.stats
# ArticleSearch Search { "body": { "aggs": { "blog_count": {…}, "archives_count": {…}}}}
# ArticleSearch Search (10ms / took 5ms)
=> { blog_count: 124, archives_count: 2452 }
This is also usefull to unify the API between different search engines:
class ArticleSearch < Caoutsearch::Search::Base
has_aggregation :popular_tags, {
filter: { term: { published: true } },
aggs: { published: { terms: { field: :tags, size: 10 } } }
}
transform_aggregation :popular_tags do |aggs|
aggs.dig(:popular_tags, :published, :buckets).pluck(:key)
end
end
class TagSearch < Caoutsearch::Search::Base
has_aggregation :popular_tags, {
terms: { field: "label", size: 20, order: { used_count: "desc" } }
}
transform_aggregation :popular_tags do |aggs|
aggs.dig(:popular_tags, :buckets).pluck(:key)
end
end
ArticleSearch.aggregate(:popular_tags).aggregations.popular_tags
=> ["Blog", "Tech", …]
TagSearch.aggregate(:popular_tags).aggregations.popular_tags
=> ["Tech", "Blog", …]
Transformations are performed on demand and result is memorized. That means:
- the result of transformation is not visible in the Response::Aggregations output.
- the block is called only once for the same search instance.
class ArticleSearch < Caoutsearch::Search::Base
has_aggregation :popular_tags, {…}
transform_aggregation :popular_tags do |aggs|
tags = aggs.dig(:popular_tags, :published, :buckets).pluck(:key)
authorized = Tag.where(title: tags, authorize: true).pluck(:title)
tags & authorized
end
end
article_search = ArticleSearch.aggregate(:popular_tags)
=> #<ArticleSearch current_aggregations: [:popular_tags]>
article_search.aggregations
# ArticleSearch Search (10ms / took 5ms)
=> #<Caoutsearch::Response::Aggregations popular_tags=#<Caoutsearch::Response::Response doc_count=100 …
article_search.aggregations.popular_tags
# (10.2ms) SELECT "tags"."title" FROM "tags" WHERE "tags"."title" IN …
=> ["Blog", "Tech", …]
article_search.aggregations.popular_tags
=> ["Blog", "Tech", …]
article_search.search("Tech").aggregations.popular_tags
# ArticleSearch Search (10ms / took 5ms)
# (10.2ms) SELECT "tags"."title" FROM "tags" WHERE "tags"."title" IN …
=> ["Blog", "Tech", …]
Be careful to avoid using aggregations.<aggregation_name>
inside a transformation block: it can lead to an infinite recursion.
class ArticleSearch < Caoutsearch::Search::Base
transform_aggregation :popular_tags do
aggregations.popular_tags.buckets.pluck("key")
end
end
ArticleSearch.aggregate(:popular_tags).aggregations.popular_tags
Traceback (most recent call last):
4: from app/searches/article_search.rb:3:in `block in <class:ArticleSearch>'
3: from app/searches/article_search.rb:3:in `block in <class:ArticleSearch>'
2: from app/searches/article_search.rb:3:in `block in <class:ArticleSearch>'
1: from app/searches/article_search.rb:3:in `block in <class:ArticleSearch>'
SystemStackError (stack level too deep)
Instead, use the argument passed to the block: it's is a shortcut for response.aggregations
which is a Response::Reponse and not a Response::Aggregations.
class ArticleSearch < Caoutsearch::Search::Base
transform_aggregation :popular_tags do |aggs|
aggs.popular_tags.buckets.pluck("key")
end
end
ArticleSearch.aggregate(:popular_tags).aggregations.popular_tags
=> ["Blog", "Tech", …]
One last helpful argument is track_total_hits
which allows to perform calculations over aggregations using the total_count
method without sending a second request.
Take a look at Total count to understand why a second request could be performed.
class ArticleSearch < Caoutsearch::Search::Base
aggregation :tagged, filter: { exist: "tag" }
transform_aggregation :tagged_rate, from: :tagged, track_total_hits: true do |aggs|
count = aggs.dig(:tagged, :doc_count)
count.to_f / total_count
end
transform_aggregation :tagged_rate_without_track_total_hits, from: :tagged do |aggs|
count = aggs.dig(:tagged, :doc_count)
count.to_f / total_count
end
end
ArticleSearch.aggregate(:tagged_rate).aggregations.tagged_rate
# ArticleSearch Search { "body": { "track_total_hits": true, "aggs": { "blog_count": {…}, "archives_count": {…}}}}
# ArticleSearch Search (10ms / took 5ms)
=> 0.95
ArticleSearch.aggregate(:tagged_rate_without_track_total_hits).aggregations.tagged_rate
# ArticleSearch Search { "body": { "aggs": { "blog_count": {…}, "archives_count": {…}}}}
# ArticleSearch Search (10ms / took 5ms)
# ArticleSearch Search { "body": { "track_total_hits": true, "aggs": { "blog_count": {…}, "archives_count":
# ArticleSearch Search (10ms / took 5ms)
=> 0.95