Created
August 17, 2024 20:41
-
-
Save tspannhw/6cdfa5481786163bb5fb9cf166575a5c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "id": "505a6c66-679b-4d4a-ad58-1e711353d67e", | |
| "metadata": {}, | |
| "source": [ | |
| "## 12-Aug-2024 == Air Quality \n", | |
| "\n", | |
| "#### Tim Spann @PaaSDev\n", | |
| "\n", | |
| "### Milvus - Attu\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "### CODE + COMMUNITY\n", | |
| "\n", | |
| "Please join my meetup group NJ/NYC/Philly/Virtual. \n", | |
| "\n", | |
| "[https://www.meetup.com/unstructured-data-meetup-new-york/](https://www.meetup.com/unstructured-data-meetup-new-york/)\n", | |
| "\n", | |
| "\n", | |
| "#### Contact Us\n", | |
| "\n", | |
| "Get Milvused! [https://milvus.io/](https://milvus.io/)\n", | |
| "\n", | |
| "Read my Newsletter every week! [https://github.com/tspannhw/FLiPStackWeekly/blob/main/142-17June2024.md](https://github.com/tspannhw/FLiPStackWeekly/blob/main/142-17June2024.md)\n", | |
| "\n", | |
| "For more cool Unstructured Data, AI and Vector Database videos check out the Milvus vector database videos here\n", | |
| "[https://www.youtube.com/@MilvusVectorDatabase/videos](https://www.youtube.com/@MilvusVectorDatabase/videos)\n", | |
| "\n", | |
| "#### Unstructured Data Meetups \n", | |
| "\n", | |
| "[https://www.meetup.com/pro/unstructureddata/](https://www.meetup.com/pro/unstructureddata/)\n", | |
| "[https://zilliz.com/community/unstructured-data-meetup](https://zilliz.com/community/unstructured-data-meetup)\n", | |
| "[https://zilliz.com/event](https://zilliz.com/event)\n", | |
| "\n", | |
| "#### [https://x.com/milvusio](Twitter/X) \n", | |
| "\n", | |
| "#### [https://www.linkedin.com/company/zilliz/](LinkedIn)\n", | |
| "\n", | |
| "#### [https://discord.com/invite/FjCMmaJng6](Discord)\n", | |
| "\n", | |
| "#### [https://milvusio.medium.com/](Blog)\n", | |
| "\n", | |
| "#### Please star our [https://github.com/milvus-io/milvus](Github)\n", | |
| "\n", | |
| "#### [https://www.youtube.com/@FLaNK-Stack](Youtube)\n", | |
| "\n", | |
| "#### [https://medium.com/@tspann/subscribe](Blog)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "id": "cd3c47f8-bd73-4d93-bd7c-4113428213cd", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import os\n", | |
| "from pymilvus import MilvusClient\n", | |
| "from dotenv import load_dotenv\n", | |
| "load_dotenv(verbose=True)\n", | |
| "\n", | |
| "DIMENSION = 384 \n", | |
| "MILVUS_URL = \"http://192.168.1.1:19530\" \n", | |
| "COLLECTION_NAME = \"airquality\"\n", | |
| "AQ_URL = \"https://www.airnowapi.org/aq/observation/zipCode/current/?format=application/json&distance=5000&zipCode=\"\n", | |
| "AQ_KEY = \"&API_KEY=\"\n", | |
| "\n", | |
| "## do do.env thing\n", | |
| "API_KEY = \"cod3\"\n", | |
| "\n", | |
| "milvus_client = MilvusClient( uri=MILVUS_URL )" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "id": "4478995a-a433-429b-ac69-bdcceb04e9e9", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "/Users/timothyspann/Downloads/code/milvusvenv/lib/python3.12/site-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n", | |
| " warnings.warn(\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "import json\n", | |
| "import requests\n", | |
| "from pymilvus import model\n", | |
| "from pymilvus.model.dense import SentenceTransformerEmbeddingFunction\n", | |
| "\n", | |
| "model = SentenceTransformerEmbeddingFunction('all-MiniLM-L6-v2',device='cpu' )\n", | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "id": "e543e60b-3ef9-4a3e-8ca5-80b5222cd2b7", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "{'state': <LoadState: Loaded>}\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "from pymilvus import connections\n", | |
| "from pymilvus import utility\n", | |
| "from pymilvus import FieldSchema, CollectionSchema, DataType, Collection\n", | |
| "import pprint\n", | |
| "\n", | |
| "## schema\n", | |
| "\n", | |
| "schema = milvus_client.create_schema(\n", | |
| " enable_dynamic_field=False\n", | |
| ")\n", | |
| "\n", | |
| "# Scalar field supports:\n", | |
| "#\n", | |
| "# BOOL: Boolean (true or false)\n", | |
| "# INT8: numpy.int8\n", | |
| "# INT16: numpy.int16\n", | |
| "# INT32: numpy.int32\n", | |
| "# INT64: numpy.int64\n", | |
| "# FLOAT: numpy.float32\n", | |
| "# DOUBLE: numpy.double\n", | |
| "# VARCHAR: VARCHAR\n", | |
| "# JSON: JSON\n", | |
| "\n", | |
| "schema.add_field(field_name='id', datatype=DataType.INT64, is_primary=True, auto_id=True)\n", | |
| "schema.add_field(field_name='DateObserved', datatype=DataType.VARCHAR, max_length=12)\n", | |
| "schema.add_field(field_name=\"HourObserved\", datatype=DataType.INT64)\n", | |
| "schema.add_field(field_name=\"Latitude\", datatype=DataType.FLOAT)\n", | |
| "schema.add_field(field_name=\"Longitude\", datatype=DataType.FLOAT)\n", | |
| "schema.add_field(field_name='ParameterName', datatype=DataType.VARCHAR, max_length=16)\n", | |
| "schema.add_field(field_name='ZipCode', datatype=DataType.VARCHAR, max_length=16)\n", | |
| "schema.add_field(field_name=\"AQI\", datatype=DataType.FLOAT)\n", | |
| "schema.add_field(field_name=\"vector\", datatype=DataType.FLOAT_VECTOR, dim=DIMENSION)\n", | |
| "schema.add_field(field_name=\"details\", datatype=DataType.VARCHAR, max_length=8000)\n", | |
| "schema.add_field(field_name=\"location\", datatype=DataType.VARCHAR, max_length=2000)\n", | |
| "\n", | |
| "index_params = milvus_client.prepare_index_params()\n", | |
| "\n", | |
| "index_params.add_index(\n", | |
| " field_name=\"id\",\n", | |
| " index_type=\"STL_SORT\"\n", | |
| ")\n", | |
| "\n", | |
| "index_params.add_index(\n", | |
| " field_name=\"vector\",\n", | |
| " index_type=\"IVF_FLAT\",\n", | |
| " metric_type=\"L2\",\n", | |
| " params={\"nlist\": 100}\n", | |
| ")\n", | |
| "\n", | |
| "if milvus_client.has_collection(collection_name=COLLECTION_NAME):\n", | |
| " print(\"Exists.\")\n", | |
| "else:\n", | |
| " milvus_client.create_collection(\n", | |
| " collection_name = COLLECTION_NAME,\n", | |
| " schema=schema,\n", | |
| " index_params=index_params\n", | |
| " )\n", | |
| " \n", | |
| " res = milvus_client.get_load_state(\n", | |
| " collection_name = COLLECTION_NAME\n", | |
| " )\n", | |
| " print(res)\n", | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "id": "748bd3d1-4bd6-462d-93d2-a977d9fde0cb", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "08520\n", | |
| "08520\n", | |
| "94027\n", | |
| "94027\n", | |
| "02199\n", | |
| "02199\n", | |
| "02199\n", | |
| "11962\n", | |
| "94957\n", | |
| "94957\n", | |
| "90402\n", | |
| "94301\n", | |
| "94301\n", | |
| "07070\n", | |
| "07070\n", | |
| "90265\n", | |
| "90265\n", | |
| "90272\n", | |
| "10013\n", | |
| "10013\n", | |
| "10007\n", | |
| "10007\n", | |
| "94123\n", | |
| "77449\n", | |
| "77449\n", | |
| "11368\n", | |
| "11368\n", | |
| "60629\n", | |
| "60629\n", | |
| "79936\n", | |
| "79936\n", | |
| "79936\n", | |
| "75034\n", | |
| "75034\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "import numpy\n", | |
| "\n", | |
| "zipcodes = [\"08520\",\"94027\",\"02199\",\"11962\",\"94957\",\"90402\",\"94301\",\"07070\",\"90265\",\"90272\",\"10013\",\"10007\",\"94123\",\"77449\",\"11368\",\"60629\",\"79936\",\"75034\"]\n", | |
| "data = []\n", | |
| "\n", | |
| "for i, val in enumerate(zipcodes):\n", | |
| " response = requests.get(str(AQ_URL + val + AQ_KEY + API_KEY )).content\n", | |
| " airqualities = json.loads(response)\n", | |
| " \n", | |
| " details = \"\"\n", | |
| " location = \"\"\n", | |
| " \n", | |
| " for jsonitems in airqualities:\n", | |
| " print(val)\n", | |
| " location = 'Location {0} {1} @ {2},{3}'.format(jsonitems.get('ReportingArea','North'), jsonitems.get('StateCode','NY'), jsonitems.get('Latitude',0),jsonitems.get('Longitude',0) )\n", | |
| " details = 'Current Air Quality Reading for {0} is {1} for {2} at Hour {3} on {4}.'.format( jsonitems.get('ParameterName','O3'),jsonitems.get('AQI',0), location, jsonitems.get('HourObserved',1),jsonitems.get('DateObserved','2024-01-01'))\n", | |
| " \n", | |
| " data.append({\"DateObserved\": jsonitems.get('DateObserved',''), \n", | |
| " \"HourObserved\": jsonitems.get('HourObserved',0), \n", | |
| " \"Latitude\": jsonitems.get('Latitude',0), \n", | |
| " \"Longitude\": jsonitems.get('Longitude',0), \n", | |
| " \"ParameterName\": jsonitems.get('ParameterName',''), \n", | |
| " \"ZipCode\": str(val),\n", | |
| " \"AQI\": jsonitems.get('AQI',0.0), \n", | |
| " \"vector\": model(details), \"details\": details, \"location\": location})\n", | |
| " \n", | |
| "\n", | |
| "res = milvus_client.insert(collection_name=COLLECTION_NAME, data=data)\n", | |
| "# print(res)\n", | |
| "# print(data)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "id": "e7e50c7d-0740-4596-bcbc-e3439bfd4e4a", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Hit: {'id': 451730705222766231, 'distance': 0.0, 'entity': {'HourObserved': 15, 'Longitude': -74.32499694824219, 'ParameterName': 'O3', 'AQI': 32.0, 'details': 'Current Air Quality Reading for O3 is 32 for Location Central NJ @ 40.401,-74.325 at Hour 15 on 2024-08-17.', 'id': 451730705222766231, 'Latitude': 40.4010009765625, 'ZipCode': '08520', 'location': 'Location Central NJ @ 40.401,-74.325', 'DateObserved': '2024-08-17'}}\n", | |
| "Hit: {'id': 451730705222766244, 'distance': 0.05379501357674599, 'entity': {'HourObserved': 15, 'Longitude': -74.18699645996094, 'ParameterName': 'O3', 'AQI': 46.0, 'details': 'Current Air Quality Reading for O3 is 46 for Location Northeast Urban NJ @ 40.692,-74.187 at Hour 15 on 2024-08-17.', 'id': 451730705222766244, 'Latitude': 40.69200134277344, 'ZipCode': '07070', 'location': 'Location Northeast Urban NJ @ 40.692,-74.187', 'DateObserved': '2024-08-17'}}\n", | |
| "Hit: {'id': 451730705222766249, 'distance': 0.05379501357674599, 'entity': {'HourObserved': 15, 'Longitude': -74.18699645996094, 'ParameterName': 'O3', 'AQI': 46.0, 'details': 'Current Air Quality Reading for O3 is 46 for Location Northeast Urban NJ @ 40.692,-74.187 at Hour 15 on 2024-08-17.', 'id': 451730705222766249, 'Latitude': 40.69200134277344, 'ZipCode': '10013', 'location': 'Location Northeast Urban NJ @ 40.692,-74.187', 'DateObserved': '2024-08-17'}}\n", | |
| "Hit: {'id': 451730705222766251, 'distance': 0.05379501357674599, 'entity': {'HourObserved': 15, 'Longitude': -74.18699645996094, 'ParameterName': 'O3', 'AQI': 46.0, 'details': 'Current Air Quality Reading for O3 is 46 for Location Northeast Urban NJ @ 40.692,-74.187 at Hour 15 on 2024-08-17.', 'id': 451730705222766251, 'Latitude': 40.69200134277344, 'ZipCode': '10007', 'location': 'Location Northeast Urban NJ @ 40.692,-74.187', 'DateObserved': '2024-08-17'}}\n", | |
| "Hit: {'id': 451730705222766238, 'distance': 0.1820998340845108, 'entity': {'HourObserved': 15, 'Longitude': -72.55059814453125, 'ParameterName': 'O3', 'AQI': 40.0, 'details': 'Current Air Quality Reading for O3 is 40 for Location Madison CT @ 41.2583,-72.5506 at Hour 15 on 2024-08-17.', 'id': 451730705222766238, 'Latitude': 41.25830078125, 'ZipCode': '11962', 'location': 'Location Madison CT @ 41.2583,-72.5506', 'DateObserved': '2024-08-17'}}\n", | |
| "Hit: {'id': 451730705222766256, 'distance': 0.24976064264774323, 'entity': {'HourObserved': 15, 'Longitude': -73.83589935302734, 'ParameterName': 'O3', 'AQI': 46.0, 'details': 'Current Air Quality Reading for O3 is 46 for Location New York City Region NY @ 40.8419,-73.8359 at Hour 15 on 2024-08-17.', 'id': 451730705222766256, 'Latitude': 40.84189987182617, 'ZipCode': '11368', 'location': 'Location New York City Region NY @ 40.8419,-73.8359', 'DateObserved': '2024-08-17'}}\n", | |
| "Hit: {'id': 451730705222766235, 'distance': 0.25786182284355164, 'entity': {'HourObserved': 15, 'Longitude': -71.0510025024414, 'ParameterName': 'O3', 'AQI': 28.0, 'details': 'Current Air Quality Reading for O3 is 28 for Location Boston MA @ 42.351,-71.051 at Hour 15 on 2024-08-17.', 'id': 451730705222766235, 'Latitude': 42.35100173950195, 'ZipCode': '02199', 'location': 'Location Boston MA @ 42.351,-71.051', 'DateObserved': '2024-08-17'}}\n", | |
| "Hit: {'id': 451730705222766241, 'distance': 0.265556663274765, 'entity': {'HourObserved': 12, 'Longitude': -118.45659637451172, 'ParameterName': 'O3', 'AQI': 35.0, 'details': 'Current Air Quality Reading for O3 is 35 for Location NW Coastal LA CA @ 34.0505,-118.4566 at Hour 12 on 2024-08-17.', 'id': 451730705222766241, 'Latitude': 34.050498962402344, 'ZipCode': '90402', 'location': 'Location NW Coastal LA CA @ 34.0505,-118.4566', 'DateObserved': '2024-08-17'}}\n", | |
| "Hit: {'id': 451730705222766248, 'distance': 0.265556663274765, 'entity': {'HourObserved': 12, 'Longitude': -118.45659637451172, 'ParameterName': 'O3', 'AQI': 35.0, 'details': 'Current Air Quality Reading for O3 is 35 for Location NW Coastal LA CA @ 34.0505,-118.4566 at Hour 12 on 2024-08-17.', 'id': 451730705222766248, 'Latitude': 34.050498962402344, 'ZipCode': '90272', 'location': 'Location NW Coastal LA CA @ 34.0505,-118.4566', 'DateObserved': '2024-08-17'}}\n", | |
| "Hit: {'id': 451730705222766253, 'distance': 0.2820756137371063, 'entity': {'HourObserved': 12, 'Longitude': -122.43000030517578, 'ParameterName': 'O3', 'AQI': 12.0, 'details': 'Current Air Quality Reading for O3 is 12 for Location San Francisco CA @ 37.75,-122.43 at Hour 12 on 2024-08-17.', 'id': 451730705222766253, 'Latitude': 37.75, 'ZipCode': '94123', 'location': 'Location San Francisco CA @ 37.75,-122.43', 'DateObserved': '2024-08-17'}}\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "\n", | |
| "\n", | |
| "# Define search parameters\n", | |
| "search_params = {\n", | |
| " \"metric_type\": \"L2\",\n", | |
| " \"params\": {\"nprobe\": 10}\n", | |
| "}\n", | |
| "\n", | |
| "# Use first record as search record\n", | |
| "query_vector = [data[0][\"vector\"]]\n", | |
| "\n", | |
| "# Execute the search on the 'vector' field\n", | |
| "search_results = milvus_client.search(\n", | |
| " COLLECTION_NAME,\n", | |
| " data=query_vector,\n", | |
| " anns_field=\"vector\",\n", | |
| " output_fields=[\"id\", \"DateObserved\", \"HourObserved\", \"Latitude\", \"Longitude\", \"ParameterName\", \"ZipCode\", \"AQI\", \"details\", \"location\"],\n", | |
| " search_params=search_params,\n", | |
| " limit=10\n", | |
| ")\n", | |
| "\n", | |
| "# Print search results\n", | |
| "for hits in search_results:\n", | |
| " for hit in hits:\n", | |
| " print(f\"Hit: {hit}\")\n", | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3 (ipykernel)", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.12.3" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 5 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment