Created
May 5, 2026 16:32
-
-
Save simonhearne/665dfdfc3cb6eca12353c08008a9ff41 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ### milvus tally operator | |
| ### finds unique field values, returns the count of entites for each | |
| from collections import Counter | |
| from pymilvus import MilvusClient | |
| ### variables to set | |
| collection_name = "embeddings" # match the collection name | |
| field = "field_to_tally" # match the field to count | |
| client = MilvusClient(uri="cluster-url",token="read-only-token") | |
| client.load_collection(collection_name) | |
| counts = Counter() | |
| iterator = client.query_iterator( | |
| collection_name=collection_name, | |
| filter=f"{field} != ''", # scalar filter — no vector needed | |
| output_fields=[field], # only pull the field we care about | |
| batch_size=1000, | |
| ) | |
| while True: | |
| batch = iterator.next() | |
| if not batch: | |
| iterator.close() | |
| break | |
| counts.update(row[field] for row in batch) | |
| print(f"\nDistinct keys: {len(counts)}") | |
| print(f"Total entities: {sum(counts.values())}") | |
| # Top 10 distinct keys by count | |
| print("\nTop ten distinct keys:") | |
| for key, n in counts.most_common(10): | |
| print(f" - {key}: {n}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment