POST /v1/auth/login
- User login to exchange for access token and refresh token
- Email/password authentication
- Returns: JWT access_token (1h), refresh_token (7d), user profile
POST /v1/auth/refresh
- Extend the lifetime of current session
- Requires: valid refresh_token
- Returns: new access_token with extended expiry
GET /v1/auth/me
- Fetch details for the current user
- Organization and project access levels
- Role-based feature availability
GET /v1/organizations
- List organizations accessible to current user
- Organization details and membership info
- Pagination and filtering support
PUT /v1/organizations/{id}/members/{user_id}
- Add or update user membership in organization
- Role assignment and permission management
- Requires admin privileges
POST /v1/files
- Upload and create a new file
- Multipart upload with project assignment
- Automatic virus scanning and validation
- Returns: file_id, status, processing_job_id
GET /v1/files
- Query and list all files with pagination
- Filter by status, date range, file type
- Paginated results with metadata
- Returns: files array, pagination info
GET /v1/files/{id}
- Fetch details for a file
- Processing history and audit trail
- Download URLs with access control
- Activity count and processing results
GET /v1/files/{id}/status
- Watch the status of a file
- Progress indicators: parsing → validation → AI matching → calculation
- Error details and retry information
DELETE /v1/files/{id}
- Remove a file from the system
- Soft delete with audit trail
- Cascades to related activities and results
- Returns: deletion confirmation and cleanup status
GET /v1/emissions
- Query calculated activity emission by name, type and date range
- Paginated, sort by title, type and date range
- Aggregated totals and summary statistics
- Returns: results array, pagination, summary
GET /v1/emissions/{id}
- Fetch detailed calculation, EF match result and processing history
- Emission factor match details and confidence score
- Calculation formula and intermediate values
- Processing history and audit trail
DELETE /v1/emissions/{id}
- Remove an emission item from the system
- Soft delete with audit trail
- Requires appropriate permissions
- Returns: deletion confirmation
POST /v1/emissions/reports
- Generate and show report based on user's criteria asynchronously
- Customizable grouping and filtering options
- Include detailed calculations or summary only
- Returns: report_id, generation status, estimated completion
GET /v1/emissions/reports
- List all emission reports with pagination
- Filter by status, creation date, report type
- Returns: reports array, pagination info
GET /v1/emissions/reports/{id}
- Fetch details for an emission report
- Report metadata, generation status, download info
- Access permissions and audit trail
DELETE /v1/emissions/reports/{id}
- Remove an emission report from the system
- Soft delete with cleanup of associated files
- Returns: deletion confirmation
CREATE TABLE organizations (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name TEXT NOT NULL,
industry TEXT,
country TEXT,
subscription_tier TEXT DEFAULT 'basic',
data_retention_days INTEGER DEFAULT 365,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
deleted_at TIMESTAMP WITH TIME ZONE
);CREATE TABLE users (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
email TEXT UNIQUE NOT NULL,
password_hash TEXT NOT NULL,
first_name TEXT,
last_name TEXT,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
deleted_at TIMESTAMP WITH TIME ZONE
);
CREATE INDEX idx_users_email ON users(email);CREATE TABLE user_organization_roles (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
organization_id UUID NOT NULL REFERENCES organizations(id) ON DELETE CASCADE,
role TEXT NOT NULL DEFAULT 'user', -- admin|manager|user|viewer
permissions JSONB DEFAULT '{}',
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
deleted_at TIMESTAMP WITH TIME ZONE,
UNIQUE(user_id, organization_id)
);
-- Multi-tenant access indexes
CREATE INDEX idx_user_org_roles_user ON user_organization_roles(user_id);
CREATE INDEX idx_user_org_roles_org_role ON user_organization_roles(organization_id, role);-- TODO: Projects scope can be added in the future for enhanced data organization
CREATE TABLE files (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
organization_id UUID NOT NULL REFERENCES organizations(id),
user_id UUID NOT NULL REFERENCES users(id),
-- File metadata
filename TEXT NOT NULL,
object_storage_path TEXT NOT NULL, -- Full S3/GCS path: s3://bucket/org-id/files/file-id
file_size BIGINT NOT NULL,
mime_type TEXT,
-- Processing status tracking
status TEXT DEFAULT 'uploaded', -- uploaded|parsing|ai_matching|calculating|completed|failed
error_message TEXT,
-- Timestamps for SLA monitoring
processing_started_at TIMESTAMP WITH TIME ZONE,
processing_completed_at TIMESTAMP WITH TIME ZONE,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
deleted_at TIMESTAMP WITH TIME ZONE
);
-- Multi-tenant isolation and performance indexes
CREATE INDEX idx_files_org_user ON files(organization_id, user_id);
-- Filtering by status and date range
CREATE INDEX idx_files_status_created ON files(status, created_at);
-- Organization and status for dashboard views
CREATE INDEX idx_files_org_status ON files(organization_id, status);CREATE TABLE emissions (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
organization_id UUID NOT NULL REFERENCES organizations(id),
file_id UUID NOT NULL REFERENCES files(id) ON DELETE CASCADE,
-- Emission data
activity_name TEXT NOT NULL,
activity_type TEXT,
activity_description TEXT,
unit TEXT NOT NULL,
volume FLOAT NOT NULL,
-- Calculation results
emission_factor_id UUID REFERENCES emission_factors(id),
calculated_emission FLOAT NOT NULL,
confidence_score FLOAT,
-- Full-text search support
-- Need to install extension: CREATE EXTENSION IF NOT EXISTS pg_trgm;
search_vector tsvector GENERATED ALWAYS AS (
to_tsvector('english',
COALESCE(activity_name, '') || ' ' ||
COALESCE(activity_description, '') || ' ' ||
COALESCE(activity_type, '')
)
) STORED,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
deleted_at TIMESTAMP WITH TIME ZONE
);
-- Performance and search indexes
CREATE INDEX idx_emissions_org_file ON emissions(organization_id, file_id);
-- Filtering by activity type and date range
CREATE INDEX idx_emissions_type_date ON emissions(activity_type, created_at);
-- Full-text search (Generalized Inverted Index)
-- GIN is designed for handling cases where the items to be indexed are composite values, and the queries to be handled by the index need to search for element values that appear within the composite items.
CREATE INDEX idx_emissions_search ON emissions USING GIN(search_vector);
-- Full-text search by activity name
CREATE INDEX idx_emissions_name ON emissions(activity_name);Option 1: Partitioning Pro: Horizontal scaling, better query performance for large datasets Con: Complexity, significant operational overhead
-- Range partitioning by date for better query performance
CREATE TABLE emissions_partitioned (LIKE emissions INCLUDING ALL) PARTITION BY RANGE (created_at);
-- Monthly partitions (example for 2023)
CREATE TABLE emissions_2023_01 PARTITION OF emissions_partitioned FOR VALUES FROM ('2023-01-01') TO ('2023-02-01');
-- Query example
SELECT * FROM emissions_partitioned WHERE created_at >= '2023-01-01' AND created_at < '2023-02-01';
-- Or Partitioning for scale (by organization)
CREATE TABLE emissions_partitioned (LIKE emissions INCLUDING ALL) PARTITION BY HASH (organization_id);
-- Query example
SELECT * FROM emissions_partitioned WHERE organization_id = 'some_org_id';Option 2: Migrating to Google Cloud Spanner or AWS Aurora for better performance and scale Pro: Better performance, easier to scale, no operational overhead Con: Introduces cloud cost, migration effort
Option 3: Time-series Database (TimescaleDB) Pro: Purpose-built for time-series emission data, automatic partitioning, excellent compression Con: Additional learning curve, PostgreSQL extension dependency
-- TimescaleDB extension assumed installed
-- Convert to TimescaleDB hypertable
SELECT create_hypertable('emissions', 'created_at', if_not_exists => TRUE);
-- Query example
SELECT * FROM emissions WHERE created_at >= '2023-01-01' AND created_at < '2023-02-01';Option 4: Hybrid Architecture - Hot/Cold Data Separation Pro: Keep recent data fast, archive old data cheaply, maintains single interface Con: Application complexity for data lifecycle management
-- Hot data (last 12 months) in PostgreSQL
-- Cold data (older) in BigQuery/Redshift for analytics
-- Unified API layer abstracts the complexityCREATE TABLE emission_factors (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name TEXT NOT NULL,
category TEXT NOT NULL,
unit TEXT NOT NULL,
value_per_unit FLOAT NOT NULL,
region TEXT,
source TEXT,
source_year INTEGER,
-- Full-text search for AI matching
search_vector tsvector GENERATED ALWAYS AS (
to_tsvector('english', name || ' ' || COALESCE(category, ''))
) STORED,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
deleted_at TIMESTAMP WITH TIME ZONE
);
CREATE INDEX idx_emission_factors_search ON emission_factors USING GIN(search_vector);
CREATE INDEX idx_emission_factors_category_region ON emission_factors(category, region);
CREATE INDEX idx_emission_factors_active ON emission_factors(is_active);CREATE TABLE reports (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
organization_id UUID NOT NULL REFERENCES organizations(id),
user_id UUID NOT NULL REFERENCES users(id),
-- Report metadata
name TEXT NOT NULL,
description TEXT,
report_type TEXT DEFAULT 'emission_summary', -- emission_summary|detailed|custom
-- Query that generated the report
query_criteria JSONB NOT NULL, -- Stores filters, date ranges, grouping options
-- Object storage reference
object_storage_path TEXT, -- Full S3/GCS path: s3://bucket/org-id/reports/report-id.pdf
file_format TEXT, -- pdf|excel|csv
file_size BIGINT,
-- Generation status
status TEXT DEFAULT 'generating', -- generating|completed|failed
generation_started_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
generation_completed_at TIMESTAMP WITH TIME ZONE,
error_message TEXT,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
deleted_at TIMESTAMP WITH TIME ZONE
);
-- Performance indexes
CREATE INDEX idx_reports_org_user ON reports(organization_id, user_id);
CREATE INDEX idx_reports_status_created ON reports(status, created_at);
CREATE INDEX idx_reports_type ON reports(report_type);- Organization-level partitioning: All data isolated by organization_id
- Row-level security: PostgreSQL RLS policies for data access control
- Index optimization: Composite indexes starting with organization_id
- Generated tsvector columns: Automatic search vector generation
- GIN indexes: High-performance full-text search
- Multi-language support: Configurable text search configurations
- Semantic search ready: Vector embeddings support for AI-powered search
- Partitioning strategy: Hash partitioning by organization_id for horizontal scaling
- Query optimization: Composite indexes for common query patterns
- Connection pooling: PgBouncer with organization-aware routing
- Read replicas: Separate read workloads from write operations
- Organization Isolation: Complete data separation using organization_id partitioning
- Multi-Org User Management: Junction table enabling users across multiple organizations with different roles
- Soft Delete Strategy: Standardized deleted_at timestamps for audit compliance and data recovery
- Role-Based Access Control: Granular permissions per organization membership
- Metadata vs Content Separation: Files table stores metadata, object_storage_path references S3/GCS
- Processing Status Tracking: Simplified status flow without progress percentages for cleaner state management
- Audit Trail: Complete created_at/updated_at/deleted_at timestamps across all entities
- Error Recovery: Automatic retry with exponential backoff and status persistence
- Direct File-to-Emission Relationship: Emissions link back to source files for complete traceability
- AI Provider Constraints: Respect external AI service limits (10 concurrent, ~10min response)
- Confidence Scoring: FLOAT-based confidence scores for AI matching quality assessment
- Calculation Transparency: Store emission_factor_id and calculation_method for audit trails
- PostgreSQL Best Practices: TEXT fields instead of VARCHAR, FLOAT instead of DECIMAL for simplicity
- Full-text Search: GIN indexes on tsvector columns for activity and emission factor matching
- Composite Indexes: Optimized for common query patterns (org + status, org + file, type + date)
- Scaling Options: Range partitioning by date, read replicas, or cloud migration for multi-million records
- Query Criteria Storage: JSONB field retains exact filters/parameters used to generate reports
- Async Report Generation: Status tracking for long-running report creation processes
- Object Storage Integration: Generated reports stored with full S3/GCS paths for secure access
- Format Flexibility: Support for PDF, Excel, CSV with appropriate MIME type handling
This design prioritizes operational simplicity while maintaining enterprise-grade multi-tenancy, audit compliance, and scalability.