-
-
Save jrhumberto/3aca8883a207848cff7ce846796e31df to your computer and use it in GitHub Desktop.
Example of GLiNER2 multi-task schema on a finance-flavored dummy memo
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| # /// script | |
| # requires-python = ">=3.10" | |
| # dependencies = [ | |
| # "gliner2", | |
| # "rich", | |
| # ] | |
| # /// | |
| """Show GLiNER2 multi-task schema on a finance-flavored dummy memo. | |
| Usage example (Python repr output): | |
| $ python gliner_demo.py | |
| { | |
| 'dividend_outlook': [{'record_date': 'March 18', 'cash_per_share': '$0.34', 'payout_ratio': '55% and 60%', 'policy_notes': []}], | |
| 'forward_watchlist': [{'trigger': ['unless regulators request a pause'], 'time_horizon': [], 'named_entity': ['dividend ladder']}, {'trigger': [], 'time_horizon': [], 'named_entity': ['execution on Asia feeder routes']}], | |
| 'entities': { | |
| 'asset_manager': ['HarborView Capital'], | |
| 'portfolio_company': ['Anchor Freight', 'Seaside Renewables'], | |
| 'policy_signal': ['steady distributions'], | |
| 'regulatory_body': ['maritime authority', 'BlueCurrent Bank'], | |
| 'liquidity_cushion': ['$1.6B'] | |
| }, | |
| 'sector_focus': 'finance', | |
| 'relation_extraction': { | |
| 'allocates_to': [('HarborView Capital', 'Seaside Renewables'), ('HarborView Capital', 'Anchor Freight')], | |
| 'overseen_by': [('revolver', 'BlueCurrent Bank')], | |
| 'funds_from': [('Anchor Freight', 'Anchor Freight')], | |
| 'signal_axes': [('dividend_policy', 0.95), ('liquidity_guardrails', 0.69), ('regulatory_watch', 0.75), ('growth_investment', 0.65)] | |
| } | |
| } | |
| """ | |
| from __future__ import annotations | |
| from textwrap import dedent | |
| from gliner2 import GLiNER2 | |
| from rich.console import Console | |
| MODEL_ID = "fastino/gliner2-base-v1" | |
| console = Console() | |
| # A synthetic memo that references dividend policy, liquidity, and oversight. | |
| DUMMY_REPORT = dedent( | |
| """ | |
| HarborView Capital reiterated its quarterly outlook after the portfolio review with | |
| Seaside Renewables and Anchor Freight. The capital committee kept the cash dividend | |
| at $0.34 per unit and guided to a payout ratio between 55% and 60% as shipping cash | |
| flows normalize. Management said the liquidity cushion sits at $1.6B including an | |
| undrawn revolver that BlueCurrent Bank continues to oversee. The memo noted that | |
| HarborView allocates to Seaside to accelerate grid interconnects while Anchor Freight | |
| funds the port automation push. The policy signal emphasized steady distributions | |
| while keeping dry powder for bolt-on deals. Record date is penciled in for March 18 | |
| with payment the following week unless regulators request a pause. Watchlist items | |
| include board approval of the dividend ladder, execution on Asia feeder routes, and | |
| the impact of a stricter Basel-oriented liquidity floor set by the maritime authority. | |
| """ | |
| ).strip() | |
| def build_schema(extractor: GLiNER2): | |
| """Create a multi-task schema with entities, classification, relations, and structures.""" | |
| schema = extractor.create_schema() | |
| schema.entities( | |
| { | |
| "asset_manager": "Named fund or investment firm steering capital allocation", | |
| "portfolio_company": "Operating companies receiving capital or attention", | |
| "policy_signal": "Language that hints at payout or capital allocation posture", | |
| "regulatory_body": "Oversight entities or watchdogs mentioned explicitly", | |
| "liquidity_cushion": "References to liquidity buffers, coverage, or reserves", | |
| }, | |
| threshold=0.35, | |
| ) | |
| schema.relations( # Slightly lower threshold to surface more relations for the demo | |
| { | |
| "allocates_to": "Source allocates capital or focus toward a target entity", | |
| "overseen_by": "Entity under oversight from a regulator or committee", | |
| "funds_from": "Capital inflow from a specified source into a target", | |
| }, | |
| threshold=0.3, | |
| ) | |
| # Single-label classification with easy-to-read categories | |
| schema.classification( | |
| "sector_focus", | |
| { | |
| "finance": "Capital markets, funds, balance sheets, distributions", | |
| "healthcare": "Providers, devices, and life sciences topics", | |
| "sports": "Teams, leagues, games, or athletic venues", | |
| }, | |
| cls_threshold=0.3, | |
| ) | |
| # Multi-label classification; text should light up at least two of these axes | |
| schema.classification( | |
| "signal_axes", | |
| { | |
| "dividend_policy": "Mentions payouts, cash per share, or payout ratios", | |
| "liquidity_guardrails": "Speaks about cushions, revolvers, or funding buffers", | |
| "regulatory_watch": "References oversight, regulators, or authority action", | |
| "growth_investment": "Signals expansion, capex, bolt-ons, or automation spend", | |
| "operational_risk": "Flags execution risk or disruptions", | |
| }, | |
| multi_label=True, | |
| cls_threshold=0.28, | |
| ) | |
| schema.structure("dividend_outlook").field("record_date", dtype="str").field( | |
| "cash_per_share", dtype="str" | |
| ).field("payout_ratio", dtype="str").field("policy_notes", dtype="list") | |
| schema.structure("forward_watchlist").field("trigger", dtype="list").field( | |
| "time_horizon", dtype="list" | |
| ).field("named_entity", dtype="list") | |
| return schema | |
| def main() -> None: | |
| extractor = GLiNER2.from_pretrained(MODEL_ID) | |
| extractor.to("cpu") | |
| schema = build_schema(extractor) | |
| console.print(f"[bold]Model:[/bold] {MODEL_ID} on CPU") | |
| console.print("Running multi-task extraction on dummy memo...\n") | |
| result = extractor.extract(DUMMY_REPORT, schema=schema) | |
| console.print_json(data=result) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment