Skip to content

Instantly share code, notes, and snippets.

View maulikmadhavi's full-sized avatar
🎯
Focusing

Maulik Madhavi maulikmadhavi

🎯
Focusing
View GitHub Profile
@maulikmadhavi
maulikmadhavi / data.yaml
Created May 25, 2025 04:40
This script is used to setup configuration from yaml and use as namedtuple
single_cls: false
img_size: 640
rect: false
class_names:
0: person
1: bicycle
2: car
3: motorcycle
4: airplane
5: bus
@maulikmadhavi
maulikmadhavi / async_call.py
Created February 1, 2025 02:07
Calling to GroqAPI and sending multiple queries in async
import os
import requests
import time
import json
from concurrent.futures import ThreadPoolExecutor, as_completed
from transformers import AutoTokenizer
# Set your Groq API key
api_key = os.getenv("GROQ_API_KEY")
# %%
import os
from pathlib import Path
import cv2
import decord
import numpy as np
import pandas as pd
from decord import VideoReader, cpu
from tqdm import tqdm
import numpy as np
import torch
import torchvision.transforms as T
from decord import VideoReader, cpu
from PIL import Image
from torchvision.transforms.functional import InterpolationMode
from transformers import AutoModel, AutoTokenizer
IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD = (0.229, 0.224, 0.225)
@maulikmadhavi
maulikmadhavi / video_vlm_vis.py
Created September 29, 2024 16:07
video rendering and confidence visualization
"""
This code sets up a Dash web application that displays a video player and a confidence plot.
The `get_concatenated_image` function takes a center frame index and returns a base64-encoded image string and a list of frame indices. The image is created by concatenating 11 frames (5 before and 5 after the center frame) horizontally, with each frame downsampled to 320x320 pixels.
The Dash layout includes a slider to select the center frame, an image display, and a confidence plot. The `update_image` callback function is triggered when the slider value changes, and it updates the image and confidence plot based on the selected center frame.
The confidence scores for "Yes" and "No" are randomly generated for each frame, and the confidence plot shows the scores for the frames around the selected center frame.
"""
from dash import Dash, html, dcc, Input, Output
@maulikmadhavi
maulikmadhavi / video_process_decord_ml.py
Last active September 29, 2024 14:11
decord efficient video reading
import decord
import numpy as np
class EfficientVideoReader:
def __init__(self, video_path, skip_frames=1, max_frames=None, batch_size=32):
"""
Initialize the EfficientVideoReader.
Args:
video_path (str): Path to the video file
# I have the matrix in the form of 1s and 0s. The size of matrix is 3x4.
# I want to find if any two adjacent row or columns elments are equal to 1, then code should return 1, else 0.
# For example:
# x1 = [[1, 0, 0, 0],[1, 0, 0, 0],[0, 0, 0, 0]] => 1
# x2 = [[1, 0, 0, 0],[0, 0, 0, 0],[0, 0, 0, 0]] => 0
# x3 = [[1, 0, 0, 0],[0, 1, 0, 0],[0, 0, 0, 0]] => 0
# x4 = [[1, 0, 0, 0],[0, 0, 0, 0],[0, 0, 0, 1]] => 0
import numpy as np
def majority_count_sliding_window(binary_vector, window_size=5):
"""
Applies a sliding window to the binary vector and sets the output to 1 if the majority
of the elements in the window are 1s.
Parameters:
binary_vector (np.array): The input binary vector.
window_size (int): The size of the sliding window.
@maulikmadhavi
maulikmadhavi / siglip_matching.py
Created August 26, 2024 18:30
siglip_matching
from transformers import SiglipModel
class SigLipSimilarity:
def __init__(self):
device = "cuda" if torch.cuda.is_available() else "cpu"
self.model = SiglipModel.from_pretrained(
"google/siglip-so400m-patch14-384",
device_map=device,
attn_implementation="flash_attention_2",
torch_dtype=torch.float16,
)