Skip to content

Instantly share code, notes, and snippets.

@tspannhw
Created August 17, 2024 20:41
Show Gist options
  • Select an option

  • Save tspannhw/6cdfa5481786163bb5fb9cf166575a5c to your computer and use it in GitHub Desktop.

Select an option

Save tspannhw/6cdfa5481786163bb5fb9cf166575a5c to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "505a6c66-679b-4d4a-ad58-1e711353d67e",
"metadata": {},
"source": [
"## 12-Aug-2024 == Air Quality \n",
"\n",
"#### Tim Spann @PaaSDev\n",
"\n",
"### Milvus - Attu\n",
"\n",
"![milvuslogo](https://milvus.io/images/milvus_logo.svg)\n",
"\n",
"\n",
"### CODE + COMMUNITY\n",
"\n",
"Please join my meetup group NJ/NYC/Philly/Virtual. \n",
"\n",
"[https://www.meetup.com/unstructured-data-meetup-new-york/](https://www.meetup.com/unstructured-data-meetup-new-york/)\n",
"\n",
"\n",
"#### Contact Us\n",
"\n",
"Get Milvused! [https://milvus.io/](https://milvus.io/)\n",
"\n",
"Read my Newsletter every week! [https://github.com/tspannhw/FLiPStackWeekly/blob/main/142-17June2024.md](https://github.com/tspannhw/FLiPStackWeekly/blob/main/142-17June2024.md)\n",
"\n",
"For more cool Unstructured Data, AI and Vector Database videos check out the Milvus vector database videos here\n",
"[https://www.youtube.com/@MilvusVectorDatabase/videos](https://www.youtube.com/@MilvusVectorDatabase/videos)\n",
"\n",
"#### Unstructured Data Meetups \n",
"\n",
"[https://www.meetup.com/pro/unstructureddata/](https://www.meetup.com/pro/unstructureddata/)\n",
"[https://zilliz.com/community/unstructured-data-meetup](https://zilliz.com/community/unstructured-data-meetup)\n",
"[https://zilliz.com/event](https://zilliz.com/event)\n",
"\n",
"#### [https://x.com/milvusio](Twitter/X) \n",
"\n",
"#### [https://www.linkedin.com/company/zilliz/](LinkedIn)\n",
"\n",
"#### [https://discord.com/invite/FjCMmaJng6](Discord)\n",
"\n",
"#### [https://milvusio.medium.com/](Blog)\n",
"\n",
"#### Please star our [https://github.com/milvus-io/milvus](Github)\n",
"\n",
"#### [https://www.youtube.com/@FLaNK-Stack](Youtube)\n",
"\n",
"#### [https://medium.com/@tspann/subscribe](Blog)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "cd3c47f8-bd73-4d93-bd7c-4113428213cd",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from pymilvus import MilvusClient\n",
"from dotenv import load_dotenv\n",
"load_dotenv(verbose=True)\n",
"\n",
"DIMENSION = 384 \n",
"MILVUS_URL = \"http://192.168.1.1:19530\" \n",
"COLLECTION_NAME = \"airquality\"\n",
"AQ_URL = \"https://www.airnowapi.org/aq/observation/zipCode/current/?format=application/json&distance=5000&zipCode=\"\n",
"AQ_KEY = \"&API_KEY=\"\n",
"\n",
"## do do.env thing\n",
"API_KEY = \"cod3\"\n",
"\n",
"milvus_client = MilvusClient( uri=MILVUS_URL )"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "4478995a-a433-429b-ac69-bdcceb04e9e9",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/timothyspann/Downloads/code/milvusvenv/lib/python3.12/site-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n",
" warnings.warn(\n"
]
}
],
"source": [
"import json\n",
"import requests\n",
"from pymilvus import model\n",
"from pymilvus.model.dense import SentenceTransformerEmbeddingFunction\n",
"\n",
"model = SentenceTransformerEmbeddingFunction('all-MiniLM-L6-v2',device='cpu' )\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "e543e60b-3ef9-4a3e-8ca5-80b5222cd2b7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'state': <LoadState: Loaded>}\n"
]
}
],
"source": [
"from pymilvus import connections\n",
"from pymilvus import utility\n",
"from pymilvus import FieldSchema, CollectionSchema, DataType, Collection\n",
"import pprint\n",
"\n",
"## schema\n",
"\n",
"schema = milvus_client.create_schema(\n",
" enable_dynamic_field=False\n",
")\n",
"\n",
"# Scalar field supports:\n",
"#\n",
"# BOOL: Boolean (true or false)\n",
"# INT8: numpy.int8\n",
"# INT16: numpy.int16\n",
"# INT32: numpy.int32\n",
"# INT64: numpy.int64\n",
"# FLOAT: numpy.float32\n",
"# DOUBLE: numpy.double\n",
"# VARCHAR: VARCHAR\n",
"# JSON: JSON\n",
"\n",
"schema.add_field(field_name='id', datatype=DataType.INT64, is_primary=True, auto_id=True)\n",
"schema.add_field(field_name='DateObserved', datatype=DataType.VARCHAR, max_length=12)\n",
"schema.add_field(field_name=\"HourObserved\", datatype=DataType.INT64)\n",
"schema.add_field(field_name=\"Latitude\", datatype=DataType.FLOAT)\n",
"schema.add_field(field_name=\"Longitude\", datatype=DataType.FLOAT)\n",
"schema.add_field(field_name='ParameterName', datatype=DataType.VARCHAR, max_length=16)\n",
"schema.add_field(field_name='ZipCode', datatype=DataType.VARCHAR, max_length=16)\n",
"schema.add_field(field_name=\"AQI\", datatype=DataType.FLOAT)\n",
"schema.add_field(field_name=\"vector\", datatype=DataType.FLOAT_VECTOR, dim=DIMENSION)\n",
"schema.add_field(field_name=\"details\", datatype=DataType.VARCHAR, max_length=8000)\n",
"schema.add_field(field_name=\"location\", datatype=DataType.VARCHAR, max_length=2000)\n",
"\n",
"index_params = milvus_client.prepare_index_params()\n",
"\n",
"index_params.add_index(\n",
" field_name=\"id\",\n",
" index_type=\"STL_SORT\"\n",
")\n",
"\n",
"index_params.add_index(\n",
" field_name=\"vector\",\n",
" index_type=\"IVF_FLAT\",\n",
" metric_type=\"L2\",\n",
" params={\"nlist\": 100}\n",
")\n",
"\n",
"if milvus_client.has_collection(collection_name=COLLECTION_NAME):\n",
" print(\"Exists.\")\n",
"else:\n",
" milvus_client.create_collection(\n",
" collection_name = COLLECTION_NAME,\n",
" schema=schema,\n",
" index_params=index_params\n",
" )\n",
" \n",
" res = milvus_client.get_load_state(\n",
" collection_name = COLLECTION_NAME\n",
" )\n",
" print(res)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "748bd3d1-4bd6-462d-93d2-a977d9fde0cb",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"08520\n",
"08520\n",
"94027\n",
"94027\n",
"02199\n",
"02199\n",
"02199\n",
"11962\n",
"94957\n",
"94957\n",
"90402\n",
"94301\n",
"94301\n",
"07070\n",
"07070\n",
"90265\n",
"90265\n",
"90272\n",
"10013\n",
"10013\n",
"10007\n",
"10007\n",
"94123\n",
"77449\n",
"77449\n",
"11368\n",
"11368\n",
"60629\n",
"60629\n",
"79936\n",
"79936\n",
"79936\n",
"75034\n",
"75034\n"
]
}
],
"source": [
"import numpy\n",
"\n",
"zipcodes = [\"08520\",\"94027\",\"02199\",\"11962\",\"94957\",\"90402\",\"94301\",\"07070\",\"90265\",\"90272\",\"10013\",\"10007\",\"94123\",\"77449\",\"11368\",\"60629\",\"79936\",\"75034\"]\n",
"data = []\n",
"\n",
"for i, val in enumerate(zipcodes):\n",
" response = requests.get(str(AQ_URL + val + AQ_KEY + API_KEY )).content\n",
" airqualities = json.loads(response)\n",
" \n",
" details = \"\"\n",
" location = \"\"\n",
" \n",
" for jsonitems in airqualities:\n",
" print(val)\n",
" location = 'Location {0} {1} @ {2},{3}'.format(jsonitems.get('ReportingArea','North'), jsonitems.get('StateCode','NY'), jsonitems.get('Latitude',0),jsonitems.get('Longitude',0) )\n",
" details = 'Current Air Quality Reading for {0} is {1} for {2} at Hour {3} on {4}.'.format( jsonitems.get('ParameterName','O3'),jsonitems.get('AQI',0), location, jsonitems.get('HourObserved',1),jsonitems.get('DateObserved','2024-01-01'))\n",
" \n",
" data.append({\"DateObserved\": jsonitems.get('DateObserved',''), \n",
" \"HourObserved\": jsonitems.get('HourObserved',0), \n",
" \"Latitude\": jsonitems.get('Latitude',0), \n",
" \"Longitude\": jsonitems.get('Longitude',0), \n",
" \"ParameterName\": jsonitems.get('ParameterName',''), \n",
" \"ZipCode\": str(val),\n",
" \"AQI\": jsonitems.get('AQI',0.0), \n",
" \"vector\": model(details), \"details\": details, \"location\": location})\n",
" \n",
"\n",
"res = milvus_client.insert(collection_name=COLLECTION_NAME, data=data)\n",
"# print(res)\n",
"# print(data)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "e7e50c7d-0740-4596-bcbc-e3439bfd4e4a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Hit: {'id': 451730705222766231, 'distance': 0.0, 'entity': {'HourObserved': 15, 'Longitude': -74.32499694824219, 'ParameterName': 'O3', 'AQI': 32.0, 'details': 'Current Air Quality Reading for O3 is 32 for Location Central NJ @ 40.401,-74.325 at Hour 15 on 2024-08-17.', 'id': 451730705222766231, 'Latitude': 40.4010009765625, 'ZipCode': '08520', 'location': 'Location Central NJ @ 40.401,-74.325', 'DateObserved': '2024-08-17'}}\n",
"Hit: {'id': 451730705222766244, 'distance': 0.05379501357674599, 'entity': {'HourObserved': 15, 'Longitude': -74.18699645996094, 'ParameterName': 'O3', 'AQI': 46.0, 'details': 'Current Air Quality Reading for O3 is 46 for Location Northeast Urban NJ @ 40.692,-74.187 at Hour 15 on 2024-08-17.', 'id': 451730705222766244, 'Latitude': 40.69200134277344, 'ZipCode': '07070', 'location': 'Location Northeast Urban NJ @ 40.692,-74.187', 'DateObserved': '2024-08-17'}}\n",
"Hit: {'id': 451730705222766249, 'distance': 0.05379501357674599, 'entity': {'HourObserved': 15, 'Longitude': -74.18699645996094, 'ParameterName': 'O3', 'AQI': 46.0, 'details': 'Current Air Quality Reading for O3 is 46 for Location Northeast Urban NJ @ 40.692,-74.187 at Hour 15 on 2024-08-17.', 'id': 451730705222766249, 'Latitude': 40.69200134277344, 'ZipCode': '10013', 'location': 'Location Northeast Urban NJ @ 40.692,-74.187', 'DateObserved': '2024-08-17'}}\n",
"Hit: {'id': 451730705222766251, 'distance': 0.05379501357674599, 'entity': {'HourObserved': 15, 'Longitude': -74.18699645996094, 'ParameterName': 'O3', 'AQI': 46.0, 'details': 'Current Air Quality Reading for O3 is 46 for Location Northeast Urban NJ @ 40.692,-74.187 at Hour 15 on 2024-08-17.', 'id': 451730705222766251, 'Latitude': 40.69200134277344, 'ZipCode': '10007', 'location': 'Location Northeast Urban NJ @ 40.692,-74.187', 'DateObserved': '2024-08-17'}}\n",
"Hit: {'id': 451730705222766238, 'distance': 0.1820998340845108, 'entity': {'HourObserved': 15, 'Longitude': -72.55059814453125, 'ParameterName': 'O3', 'AQI': 40.0, 'details': 'Current Air Quality Reading for O3 is 40 for Location Madison CT @ 41.2583,-72.5506 at Hour 15 on 2024-08-17.', 'id': 451730705222766238, 'Latitude': 41.25830078125, 'ZipCode': '11962', 'location': 'Location Madison CT @ 41.2583,-72.5506', 'DateObserved': '2024-08-17'}}\n",
"Hit: {'id': 451730705222766256, 'distance': 0.24976064264774323, 'entity': {'HourObserved': 15, 'Longitude': -73.83589935302734, 'ParameterName': 'O3', 'AQI': 46.0, 'details': 'Current Air Quality Reading for O3 is 46 for Location New York City Region NY @ 40.8419,-73.8359 at Hour 15 on 2024-08-17.', 'id': 451730705222766256, 'Latitude': 40.84189987182617, 'ZipCode': '11368', 'location': 'Location New York City Region NY @ 40.8419,-73.8359', 'DateObserved': '2024-08-17'}}\n",
"Hit: {'id': 451730705222766235, 'distance': 0.25786182284355164, 'entity': {'HourObserved': 15, 'Longitude': -71.0510025024414, 'ParameterName': 'O3', 'AQI': 28.0, 'details': 'Current Air Quality Reading for O3 is 28 for Location Boston MA @ 42.351,-71.051 at Hour 15 on 2024-08-17.', 'id': 451730705222766235, 'Latitude': 42.35100173950195, 'ZipCode': '02199', 'location': 'Location Boston MA @ 42.351,-71.051', 'DateObserved': '2024-08-17'}}\n",
"Hit: {'id': 451730705222766241, 'distance': 0.265556663274765, 'entity': {'HourObserved': 12, 'Longitude': -118.45659637451172, 'ParameterName': 'O3', 'AQI': 35.0, 'details': 'Current Air Quality Reading for O3 is 35 for Location NW Coastal LA CA @ 34.0505,-118.4566 at Hour 12 on 2024-08-17.', 'id': 451730705222766241, 'Latitude': 34.050498962402344, 'ZipCode': '90402', 'location': 'Location NW Coastal LA CA @ 34.0505,-118.4566', 'DateObserved': '2024-08-17'}}\n",
"Hit: {'id': 451730705222766248, 'distance': 0.265556663274765, 'entity': {'HourObserved': 12, 'Longitude': -118.45659637451172, 'ParameterName': 'O3', 'AQI': 35.0, 'details': 'Current Air Quality Reading for O3 is 35 for Location NW Coastal LA CA @ 34.0505,-118.4566 at Hour 12 on 2024-08-17.', 'id': 451730705222766248, 'Latitude': 34.050498962402344, 'ZipCode': '90272', 'location': 'Location NW Coastal LA CA @ 34.0505,-118.4566', 'DateObserved': '2024-08-17'}}\n",
"Hit: {'id': 451730705222766253, 'distance': 0.2820756137371063, 'entity': {'HourObserved': 12, 'Longitude': -122.43000030517578, 'ParameterName': 'O3', 'AQI': 12.0, 'details': 'Current Air Quality Reading for O3 is 12 for Location San Francisco CA @ 37.75,-122.43 at Hour 12 on 2024-08-17.', 'id': 451730705222766253, 'Latitude': 37.75, 'ZipCode': '94123', 'location': 'Location San Francisco CA @ 37.75,-122.43', 'DateObserved': '2024-08-17'}}\n"
]
}
],
"source": [
"\n",
"\n",
"# Define search parameters\n",
"search_params = {\n",
" \"metric_type\": \"L2\",\n",
" \"params\": {\"nprobe\": 10}\n",
"}\n",
"\n",
"# Use first record as search record\n",
"query_vector = [data[0][\"vector\"]]\n",
"\n",
"# Execute the search on the 'vector' field\n",
"search_results = milvus_client.search(\n",
" COLLECTION_NAME,\n",
" data=query_vector,\n",
" anns_field=\"vector\",\n",
" output_fields=[\"id\", \"DateObserved\", \"HourObserved\", \"Latitude\", \"Longitude\", \"ParameterName\", \"ZipCode\", \"AQI\", \"details\", \"location\"],\n",
" search_params=search_params,\n",
" limit=10\n",
")\n",
"\n",
"# Print search results\n",
"for hits in search_results:\n",
" for hit in hits:\n",
" print(f\"Hit: {hit}\")\n",
"\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment