{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "36453043", "metadata": {}, "outputs": [], "source": [ "import os\n", "import time\n", "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"1\"" ] }, { "cell_type": "code", "execution_count": 2, "id": "7671ce13", "metadata": {}, "outputs": [], "source": [ "from pathlib import Path\n", "pdf_directory = \"./pdfs\"\n", "pdf_files = [str(f.resolve())for f in Path(pdf_directory).glob(\"*.pdf\")][:10]" ] }, { "cell_type": "code", "execution_count": 3, "id": "80516b16", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Time taken: 68.44 seconds for 10 files with 275 pages @ 4.02 pages per second\n" ] } ], "source": [ "import pymupdf\n", "from PIL import Image\n", "import io\n", "images = []\n", "dpi = 144\n", "\n", "t0 = time.perf_counter()\n", "for pdf_path in pdf_files:\n", " pdf_document = pymupdf.open(pdf_path)\n", "\n", " zoom = dpi / 72.0\n", " matrix = pymupdf.Matrix(zoom, zoom)\n", "\n", " for page_num in range(pdf_document.page_count):\n", " page = pdf_document[page_num]\n", "\n", " pixmap = page.get_pixmap(matrix=matrix, alpha=False)\n", " Image.MAX_IMAGE_PIXELS = None\n", "\n", " img_data = pixmap.tobytes(\"png\")\n", " img = Image.open(io.BytesIO(img_data))\n", " images.append(img)\n", " \n", " pdf_document.close()\n", "\n", "t1 = time.perf_counter()\n", "print(f'Time taken: {(t1 - t0):.2f} seconds for {len(pdf_files)} files with {len(images)} pages @ {(len(images) / (t1 - t0)):.2f} pages per second')\n", "\n" ] }, { "cell_type": "code", "execution_count": 5, "id": "e226ce0a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "INFO 10-22 17:04:03 [__init__.py:225] Automatically detected platform cuda.\n", "INFO 10-22 17:04:06 [utils.py:253] non-default args: {'trust_remote_code': True, 'disable_log_stats': True, 'limit_mm_per_prompt': {'image': 1}, 'model': 'deepseek-ai/DeepSeek-OCR'}\n", "INFO 10-22 17:04:06 [model.py:667] Resolved architecture: DeepseekOCRForCausalLM\n", "INFO 10-22 17:04:06 [model.py:1756] Using max model len 8192\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "INFO 10-22 17:04:07 [scheduler.py:219] Chunked prefill is enabled with max_num_batched_tokens=8192.\n", "\u001b[1;36m(EngineCore_DP0 pid=2185600)\u001b[0;0m INFO 10-22 17:04:09 [core.py:716] Waiting for init message from front-end.\n", "\u001b[1;36m(EngineCore_DP0 pid=2185600)\u001b[0;0m INFO 10-22 17:04:09 [core.py:94] Initializing a V1 LLM engine (v0.11.1rc3.dev7+ga0003b56b) with config: model='deepseek-ai/DeepSeek-OCR', speculative_config=None, tokenizer='deepseek-ai/DeepSeek-OCR', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=8192, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, data_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, structured_outputs_config=StructuredOutputsConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_parser='', enable_in_reasoning=False), observability_config=ObservabilityConfig(show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None), seed=0, served_model_name=deepseek-ai/DeepSeek-OCR, enable_prefix_caching=True, chunked_prefill_enabled=True, pooler_config=None, compilation_config={'level': None, 'mode': 3, 'debug_dump_path': None, 'cache_dir': '', 'backend': 'inductor', 'custom_ops': ['none'], 'splitting_ops': ['vllm::unified_attention', 'vllm::unified_attention_with_output', 'vllm::unified_mla_attention', 'vllm::unified_mla_attention_with_output', 'vllm::mamba_mixer2', 'vllm::mamba_mixer', 'vllm::short_conv', 'vllm::linear_attention', 'vllm::plamo2_mamba_mixer', 'vllm::gdn_attention', 'vllm::sparse_attn_indexer'], 'use_inductor': None, 'compile_sizes': [], 'inductor_compile_config': {'enable_auto_functionalized_v2': False, 'combo_kernels': True, 'benchmark_combo_kernel': True}, 'inductor_passes': {}, 'cudagraph_mode': , 'use_cudagraph': True, 'cudagraph_num_of_warmups': 1, 'cudagraph_capture_sizes': [512, 504, 496, 488, 480, 472, 464, 456, 448, 440, 432, 424, 416, 408, 400, 392, 384, 376, 368, 360, 352, 344, 336, 328, 320, 312, 304, 296, 288, 280, 272, 264, 256, 248, 240, 232, 224, 216, 208, 200, 192, 184, 176, 168, 160, 152, 144, 136, 128, 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 4, 2, 1], 'cudagraph_copy_inputs': False, 'full_cuda_graph': True, 'cudagraph_specialize_lora': True, 'use_inductor_graph_partition': False, 'pass_config': {}, 'max_capture_size': 512, 'local_cache_dir': None}\n", "\u001b[1;36m(EngineCore_DP0 pid=2185600)\u001b[0;0m INFO 10-22 17:04:10 [parallel_state.py:1325] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0, EP rank 0\n", "[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0\n", "[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0\n", "[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0\n", "[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0\n", "[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0\n", "[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0\n", "\u001b[1;36m(EngineCore_DP0 pid=2185600)\u001b[0;0m INFO 10-22 17:04:14 [gpu_model_runner.py:2860] Starting to load model deepseek-ai/DeepSeek-OCR...\n", "\u001b[1;36m(EngineCore_DP0 pid=2185600)\u001b[0;0m INFO 10-22 17:04:14 [layer.py:518] MultiHeadAttention attn_backend: _Backend.FLASH_ATTN, use_upstream_fa: False\n", "\u001b[1;36m(EngineCore_DP0 pid=2185600)\u001b[0;0m INFO 10-22 17:04:14 [cuda.py:403] Using Flash Attention backend on V1 engine.\n", "\u001b[1;36m(EngineCore_DP0 pid=2185600)\u001b[0;0m WARNING 10-22 17:04:14 [vllm.py:867] `torch.compile` is turned on, but the model deepseek-ai/DeepSeek-OCR does not support it. Please open an issue on GitHub if you want it to be supported.\n", "\u001b[1;36m(EngineCore_DP0 pid=2185600)\u001b[0;0m WARNING 10-22 17:04:14 [vllm.py:867] `torch.compile` is turned on, but the model deepseek-ai/DeepSeek-OCR does not support it. Please open an issue on GitHub if you want it to be supported.\n", "\u001b[1;36m(EngineCore_DP0 pid=2185600)\u001b[0;0m INFO 10-22 17:04:14 [weight_utils.py:419] Using model weights format ['*.safetensors']\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "fe6c779f99b64deaa7589061feeeeda7", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Loading safetensors checkpoint shards: 0% Completed | 0/1 [00:00\\n<|grounding|>Convert the document to markdown.\",\n", ") -> list[dict]:\n", " \"\"\"Create chat message for OCR processing.\"\"\"\n", " # Convert to PIL Image if needed\n", " pil_img = image\n", "\n", " # Convert to RGB\n", " pil_img = pil_img.convert(\"RGB\")\n", "\n", " # Convert to base64 data URI\n", " buf = io.BytesIO()\n", " pil_img.save(buf, format=\"PNG\")\n", " data_uri = f\"data:image/png;base64,{base64.b64encode(buf.getvalue()).decode()}\"\n", "\n", " # Return message in vLLM format\n", " return [\n", " {\n", " \"role\": \"user\",\n", " \"content\": [\n", " {\"type\": \"image_url\", \"image_url\": {\"url\": data_uri}},\n", " {\"type\": \"text\", \"text\": prompt},\n", " ],\n", " }\n", " ]\n" ] }, { "cell_type": "markdown", "id": "9911cfbd", "metadata": {}, "source": [ "## Batching" ] }, { "cell_type": "code", "execution_count": 43, "id": "16a8fc7b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Processed 1 pages in 0.33s; Preprocess @ 12.21 p/s, Model @ 4.04 p/s, Combined @ 3.03 p/s\n", "Processed 1 pages in 0.38s; Preprocess @ 5.87 p/s, Model @ 4.79 p/s, Combined @ 2.64 p/s\n", "Processed 1 pages in 0.28s; Preprocess @ 11.96 p/s, Model @ 5.01 p/s, Combined @ 3.53 p/s\n", "Processed 1 pages in 0.33s; Preprocess @ 8.30 p/s, Model @ 4.88 p/s, Combined @ 3.07 p/s\n", "Processed 1 pages in 0.30s; Preprocess @ 9.86 p/s, Model @ 5.02 p/s, Combined @ 3.33 p/s\n", "Processed 1 pages in 0.39s; Preprocess @ 5.50 p/s, Model @ 4.84 p/s, Combined @ 2.57 p/s\n", "Processed 1 pages in 0.29s; Preprocess @ 12.00 p/s, Model @ 4.78 p/s, Combined @ 3.42 p/s\n", "Processed 1 pages in 0.32s; Preprocess @ 8.80 p/s, Model @ 4.88 p/s, Combined @ 3.14 p/s\n", "Processed 1 pages in 0.32s; Preprocess @ 8.47 p/s, Model @ 4.91 p/s, Combined @ 3.11 p/s\n", "Processed 1 pages in 0.28s; Preprocess @ 13.42 p/s, Model @ 4.97 p/s, Combined @ 3.63 p/s\n", "Processed 1 pages in 0.31s; Preprocess @ 9.44 p/s, Model @ 4.91 p/s, Combined @ 3.23 p/s\n", "Processed 1 pages in 0.30s; Preprocess @ 10.11 p/s, Model @ 4.93 p/s, Combined @ 3.32 p/s\n", "Processed 1 pages in 0.50s; Preprocess @ 3.56 p/s, Model @ 4.65 p/s, Combined @ 2.02 p/s\n", "Processed 1 pages in 0.33s; Preprocess @ 8.18 p/s, Model @ 4.74 p/s, Combined @ 3.00 p/s\n", "Processed 1 pages in 0.34s; Preprocess @ 7.57 p/s, Model @ 4.77 p/s, Combined @ 2.93 p/s\n", "Processed 1 pages in 0.28s; Preprocess @ 13.05 p/s, Model @ 4.97 p/s, Combined @ 3.60 p/s\n", "Processed 1 pages in 0.28s; Preprocess @ 13.34 p/s, Model @ 4.99 p/s, Combined @ 3.63 p/s\n", "Processed 1 pages in 0.44s; Preprocess @ 4.33 p/s, Model @ 4.81 p/s, Combined @ 2.28 p/s\n", "Processed 1 pages in 0.32s; Preprocess @ 8.43 p/s, Model @ 4.95 p/s, Combined @ 3.12 p/s\n", "Processed 1 pages in 0.39s; Preprocess @ 5.43 p/s, Model @ 4.77 p/s, Combined @ 2.54 p/s\n", "Processed 1 pages in 0.29s; Preprocess @ 11.59 p/s, Model @ 5.00 p/s, Combined @ 3.49 p/s\n", "Processed 1 pages in 0.30s; Preprocess @ 10.67 p/s, Model @ 4.83 p/s, Combined @ 3.32 p/s\n", "Processed 1 pages in 0.32s; Preprocess @ 8.80 p/s, Model @ 4.95 p/s, Combined @ 3.17 p/s\n", "Processed 1 pages in 0.29s; Preprocess @ 10.81 p/s, Model @ 4.95 p/s, Combined @ 3.40 p/s\n", "Processed 1 pages in 0.35s; Preprocess @ 9.79 p/s, Model @ 4.06 p/s, Combined @ 2.87 p/s\n", "Processed 1 pages in 0.34s; Preprocess @ 9.78 p/s, Model @ 4.25 p/s, Combined @ 2.96 p/s\n", "Processed 1 pages in 0.30s; Preprocess @ 11.07 p/s, Model @ 4.84 p/s, Combined @ 3.37 p/s\n", "Processed 1 pages in 0.29s; Preprocess @ 11.80 p/s, Model @ 4.82 p/s, Combined @ 3.42 p/s\n", "Processed 1 pages in 0.28s; Preprocess @ 12.51 p/s, Model @ 4.92 p/s, Combined @ 3.53 p/s\n", "Processed 1 pages in 0.28s; Preprocess @ 13.48 p/s, Model @ 4.96 p/s, Combined @ 3.63 p/s\n", "Processed 1 pages in 0.28s; Preprocess @ 12.07 p/s, Model @ 4.98 p/s, Combined @ 3.52 p/s\n", "Processed 1 pages in 0.29s; Preprocess @ 11.53 p/s, Model @ 4.86 p/s, Combined @ 3.42 p/s\n", "Processed 1 pages in 0.30s; Preprocess @ 10.74 p/s, Model @ 4.95 p/s, Combined @ 3.39 p/s\n", "Processed 1 pages in 0.31s; Preprocess @ 9.69 p/s, Model @ 4.87 p/s, Combined @ 3.24 p/s\n", "Processed 1 pages in 0.30s; Preprocess @ 11.26 p/s, Model @ 4.79 p/s, Combined @ 3.36 p/s\n", "Processed 1 pages in 0.30s; Preprocess @ 10.48 p/s, Model @ 4.93 p/s, Combined @ 3.35 p/s\n", "Processed 1 pages in 0.31s; Preprocess @ 9.72 p/s, Model @ 4.91 p/s, Combined @ 3.26 p/s\n", "Processed 1 pages in 0.27s; Preprocess @ 13.48 p/s, Model @ 5.03 p/s, Combined @ 3.66 p/s\n", "Processed 1 pages in 0.28s; Preprocess @ 12.57 p/s, Model @ 5.07 p/s, Combined @ 3.61 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.08 p/s, Model @ 4.47 p/s, Combined @ 1.42 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.04 p/s, Model @ 4.45 p/s, Combined @ 1.40 p/s\n", "Processed 1 pages in 0.73s; Preprocess @ 2.01 p/s, Model @ 4.37 p/s, Combined @ 1.38 p/s\n", "Processed 1 pages in 0.72s; Preprocess @ 2.02 p/s, Model @ 4.49 p/s, Combined @ 1.39 p/s\n", "Processed 1 pages in 0.73s; Preprocess @ 2.02 p/s, Model @ 4.29 p/s, Combined @ 1.38 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.05 p/s, Model @ 4.49 p/s, Combined @ 1.41 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.05 p/s, Model @ 4.46 p/s, Combined @ 1.41 p/s\n", "Processed 1 pages in 0.72s; Preprocess @ 2.02 p/s, Model @ 4.50 p/s, Combined @ 1.40 p/s\n", "Processed 1 pages in 0.73s; Preprocess @ 1.98 p/s, Model @ 4.52 p/s, Combined @ 1.37 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.05 p/s, Model @ 4.60 p/s, Combined @ 1.42 p/s\n", "Processed 1 pages in 0.72s; Preprocess @ 2.02 p/s, Model @ 4.40 p/s, Combined @ 1.38 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.03 p/s, Model @ 4.57 p/s, Combined @ 1.41 p/s\n", "Processed 1 pages in 0.70s; Preprocess @ 2.07 p/s, Model @ 4.59 p/s, Combined @ 1.43 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.03 p/s, Model @ 4.64 p/s, Combined @ 1.41 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.05 p/s, Model @ 4.58 p/s, Combined @ 1.42 p/s\n", "Processed 1 pages in 0.72s; Preprocess @ 1.98 p/s, Model @ 4.61 p/s, Combined @ 1.38 p/s\n", "Processed 1 pages in 0.72s; Preprocess @ 2.04 p/s, Model @ 4.44 p/s, Combined @ 1.40 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.05 p/s, Model @ 4.56 p/s, Combined @ 1.42 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.04 p/s, Model @ 4.60 p/s, Combined @ 1.41 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.04 p/s, Model @ 4.63 p/s, Combined @ 1.42 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.07 p/s, Model @ 4.39 p/s, Combined @ 1.41 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.03 p/s, Model @ 4.60 p/s, Combined @ 1.41 p/s\n", "Processed 1 pages in 0.72s; Preprocess @ 2.04 p/s, Model @ 4.46 p/s, Combined @ 1.40 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.05 p/s, Model @ 4.41 p/s, Combined @ 1.40 p/s\n", "Processed 1 pages in 0.70s; Preprocess @ 2.08 p/s, Model @ 4.51 p/s, Combined @ 1.42 p/s\n", "Processed 1 pages in 0.70s; Preprocess @ 2.09 p/s, Model @ 4.59 p/s, Combined @ 1.44 p/s\n", "Processed 1 pages in 0.72s; Preprocess @ 2.04 p/s, Model @ 4.37 p/s, Combined @ 1.39 p/s\n", "Processed 1 pages in 0.70s; Preprocess @ 2.07 p/s, Model @ 4.53 p/s, Combined @ 1.42 p/s\n", "Processed 1 pages in 0.70s; Preprocess @ 2.07 p/s, Model @ 4.61 p/s, Combined @ 1.43 p/s\n", "Processed 1 pages in 0.72s; Preprocess @ 2.02 p/s, Model @ 4.51 p/s, Combined @ 1.40 p/s\n", "Processed 1 pages in 0.70s; Preprocess @ 2.10 p/s, Model @ 4.50 p/s, Combined @ 1.43 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.07 p/s, Model @ 4.43 p/s, Combined @ 1.41 p/s\n", "Processed 1 pages in 0.72s; Preprocess @ 2.00 p/s, Model @ 4.49 p/s, Combined @ 1.38 p/s\n", "Processed 1 pages in 0.72s; Preprocess @ 1.99 p/s, Model @ 4.52 p/s, Combined @ 1.38 p/s\n", "Processed 1 pages in 0.72s; Preprocess @ 2.03 p/s, Model @ 4.48 p/s, Combined @ 1.40 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.06 p/s, Model @ 4.43 p/s, Combined @ 1.41 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.06 p/s, Model @ 4.51 p/s, Combined @ 1.41 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.07 p/s, Model @ 4.47 p/s, Combined @ 1.41 p/s\n", "Processed 1 pages in 0.68s; Preprocess @ 2.03 p/s, Model @ 5.24 p/s, Combined @ 1.46 p/s\n", "Processed 1 pages in 0.69s; Preprocess @ 2.00 p/s, Model @ 5.27 p/s, Combined @ 1.45 p/s\n", "Processed 1 pages in 0.68s; Preprocess @ 2.05 p/s, Model @ 5.26 p/s, Combined @ 1.47 p/s\n", "Processed 1 pages in 0.67s; Preprocess @ 2.10 p/s, Model @ 5.19 p/s, Combined @ 1.49 p/s\n", "Processed 1 pages in 0.72s; Preprocess @ 2.02 p/s, Model @ 4.48 p/s, Combined @ 1.39 p/s\n", "Processed 1 pages in 0.68s; Preprocess @ 2.03 p/s, Model @ 5.27 p/s, Combined @ 1.46 p/s\n", "Processed 1 pages in 0.72s; Preprocess @ 2.05 p/s, Model @ 4.37 p/s, Combined @ 1.39 p/s\n", "Processed 1 pages in 0.72s; Preprocess @ 2.01 p/s, Model @ 4.55 p/s, Combined @ 1.39 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.04 p/s, Model @ 4.53 p/s, Combined @ 1.41 p/s\n", "Processed 1 pages in 0.72s; Preprocess @ 2.00 p/s, Model @ 4.58 p/s, Combined @ 1.39 p/s\n", "Processed 1 pages in 0.70s; Preprocess @ 2.07 p/s, Model @ 4.53 p/s, Combined @ 1.42 p/s\n", "Processed 1 pages in 0.67s; Preprocess @ 2.06 p/s, Model @ 5.31 p/s, Combined @ 1.49 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.05 p/s, Model @ 4.49 p/s, Combined @ 1.41 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.03 p/s, Model @ 4.53 p/s, Combined @ 1.40 p/s\n", "Processed 1 pages in 0.73s; Preprocess @ 1.98 p/s, Model @ 4.39 p/s, Combined @ 1.37 p/s\n", "Processed 1 pages in 0.72s; Preprocess @ 1.98 p/s, Model @ 4.59 p/s, Combined @ 1.38 p/s\n", "Processed 1 pages in 0.70s; Preprocess @ 2.11 p/s, Model @ 4.50 p/s, Combined @ 1.44 p/s\n", "Processed 1 pages in 0.70s; Preprocess @ 2.07 p/s, Model @ 4.52 p/s, Combined @ 1.42 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.06 p/s, Model @ 4.47 p/s, Combined @ 1.41 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.06 p/s, Model @ 4.49 p/s, Combined @ 1.41 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.04 p/s, Model @ 4.52 p/s, Combined @ 1.41 p/s\n", "Processed 1 pages in 0.70s; Preprocess @ 2.08 p/s, Model @ 4.59 p/s, Combined @ 1.43 p/s\n", "Processed 1 pages in 0.70s; Preprocess @ 2.11 p/s, Model @ 4.51 p/s, Combined @ 1.44 p/s\n", "Processed 1 pages in 0.69s; Preprocess @ 2.09 p/s, Model @ 4.64 p/s, Combined @ 1.44 p/s\n", "Processed 1 pages in 0.67s; Preprocess @ 2.07 p/s, Model @ 5.27 p/s, Combined @ 1.49 p/s\n", "Processed 1 pages in 0.68s; Preprocess @ 2.01 p/s, Model @ 5.39 p/s, Combined @ 1.46 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.05 p/s, Model @ 4.59 p/s, Combined @ 1.42 p/s\n", "Processed 1 pages in 0.70s; Preprocess @ 2.07 p/s, Model @ 4.67 p/s, Combined @ 1.44 p/s\n", "Processed 1 pages in 0.70s; Preprocess @ 2.05 p/s, Model @ 4.65 p/s, Combined @ 1.42 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.01 p/s, Model @ 4.67 p/s, Combined @ 1.41 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.04 p/s, Model @ 4.57 p/s, Combined @ 1.41 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.03 p/s, Model @ 4.62 p/s, Combined @ 1.41 p/s\n", "Processed 1 pages in 0.72s; Preprocess @ 2.00 p/s, Model @ 4.57 p/s, Combined @ 1.39 p/s\n", "Processed 1 pages in 0.70s; Preprocess @ 2.07 p/s, Model @ 4.56 p/s, Combined @ 1.42 p/s\n", "Processed 1 pages in 0.72s; Preprocess @ 2.00 p/s, Model @ 4.57 p/s, Combined @ 1.39 p/s\n", "Processed 1 pages in 0.72s; Preprocess @ 2.04 p/s, Model @ 4.35 p/s, Combined @ 1.39 p/s\n", "Processed 1 pages in 0.70s; Preprocess @ 2.09 p/s, Model @ 4.56 p/s, Combined @ 1.43 p/s\n", "Processed 1 pages in 0.70s; Preprocess @ 2.07 p/s, Model @ 4.58 p/s, Combined @ 1.43 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.04 p/s, Model @ 4.50 p/s, Combined @ 1.40 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.02 p/s, Model @ 4.58 p/s, Combined @ 1.40 p/s\n", "Processed 1 pages in 0.70s; Preprocess @ 2.06 p/s, Model @ 4.62 p/s, Combined @ 1.42 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.04 p/s, Model @ 4.58 p/s, Combined @ 1.41 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.03 p/s, Model @ 4.56 p/s, Combined @ 1.41 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.02 p/s, Model @ 4.57 p/s, Combined @ 1.40 p/s\n", "Processed 1 pages in 0.70s; Preprocess @ 2.07 p/s, Model @ 4.58 p/s, Combined @ 1.43 p/s\n", "Processed 1 pages in 0.67s; Preprocess @ 2.06 p/s, Model @ 5.35 p/s, Combined @ 1.49 p/s\n", "Processed 1 pages in 0.68s; Preprocess @ 2.05 p/s, Model @ 5.20 p/s, Combined @ 1.47 p/s\n", "Processed 1 pages in 0.68s; Preprocess @ 2.04 p/s, Model @ 5.27 p/s, Combined @ 1.47 p/s\n", "Processed 1 pages in 0.72s; Preprocess @ 2.07 p/s, Model @ 4.31 p/s, Combined @ 1.40 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.05 p/s, Model @ 4.59 p/s, Combined @ 1.42 p/s\n", "Processed 1 pages in 0.70s; Preprocess @ 2.07 p/s, Model @ 4.56 p/s, Combined @ 1.42 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.03 p/s, Model @ 4.61 p/s, Combined @ 1.41 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.06 p/s, Model @ 4.46 p/s, Combined @ 1.41 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.06 p/s, Model @ 4.45 p/s, Combined @ 1.41 p/s\n", "Processed 1 pages in 0.72s; Preprocess @ 2.04 p/s, Model @ 4.40 p/s, Combined @ 1.39 p/s\n", "Processed 1 pages in 0.72s; Preprocess @ 2.04 p/s, Model @ 4.42 p/s, Combined @ 1.40 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.07 p/s, Model @ 4.42 p/s, Combined @ 1.41 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.07 p/s, Model @ 4.43 p/s, Combined @ 1.41 p/s\n", "Processed 1 pages in 0.72s; Preprocess @ 2.02 p/s, Model @ 4.50 p/s, Combined @ 1.40 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.06 p/s, Model @ 4.47 p/s, Combined @ 1.41 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.04 p/s, Model @ 4.47 p/s, Combined @ 1.40 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.06 p/s, Model @ 4.51 p/s, Combined @ 1.42 p/s\n", "Processed 1 pages in 0.72s; Preprocess @ 2.01 p/s, Model @ 4.51 p/s, Combined @ 1.39 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.05 p/s, Model @ 4.48 p/s, Combined @ 1.41 p/s\n", "Processed 1 pages in 0.70s; Preprocess @ 2.11 p/s, Model @ 4.38 p/s, Combined @ 1.42 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.09 p/s, Model @ 4.37 p/s, Combined @ 1.41 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.04 p/s, Model @ 4.56 p/s, Combined @ 1.41 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.09 p/s, Model @ 4.41 p/s, Combined @ 1.42 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.06 p/s, Model @ 4.41 p/s, Combined @ 1.40 p/s\n", "Processed 1 pages in 0.72s; Preprocess @ 2.03 p/s, Model @ 4.35 p/s, Combined @ 1.38 p/s\n", "Processed 1 pages in 0.72s; Preprocess @ 2.02 p/s, Model @ 4.42 p/s, Combined @ 1.39 p/s\n", "Processed 1 pages in 0.70s; Preprocess @ 2.08 p/s, Model @ 4.45 p/s, Combined @ 1.42 p/s\n", "Processed 1 pages in 0.73s; Preprocess @ 1.97 p/s, Model @ 4.47 p/s, Combined @ 1.37 p/s\n", "Processed 1 pages in 0.72s; Preprocess @ 2.00 p/s, Model @ 4.48 p/s, Combined @ 1.38 p/s\n", "Processed 1 pages in 0.71s; Preprocess @ 2.05 p/s, Model @ 4.49 p/s, Combined @ 1.40 p/s\n", "Processed 1 pages in 0.70s; Preprocess @ 2.06 p/s, Model @ 4.57 p/s, Combined @ 1.42 p/s\n", "Processed 1 pages in 0.72s; Preprocess @ 2.02 p/s, Model @ 4.48 p/s, Combined @ 1.39 p/s\n", "Processed 1 pages in 0.69s; Preprocess @ 2.13 p/s, Model @ 4.51 p/s, Combined @ 1.45 p/s\n", "Processed 1 pages in 0.28s; Preprocess @ 12.26 p/s, Model @ 5.02 p/s, Combined @ 3.56 p/s\n", "Processed 1 pages in 0.28s; Preprocess @ 12.39 p/s, Model @ 5.01 p/s, Combined @ 3.57 p/s\n", "Processed 1 pages in 0.28s; Preprocess @ 12.93 p/s, Model @ 5.03 p/s, Combined @ 3.62 p/s\n", "Processed 1 pages in 0.27s; Preprocess @ 13.14 p/s, Model @ 5.15 p/s, Combined @ 3.70 p/s\n", "Processed 1 pages in 0.26s; Preprocess @ 18.69 p/s, Model @ 4.86 p/s, Combined @ 3.86 p/s\n", "Processed 1 pages in 0.34s; Preprocess @ 16.22 p/s, Model @ 3.60 p/s, Combined @ 2.95 p/s\n", "Processed 1 pages in 0.33s; Preprocess @ 17.35 p/s, Model @ 3.71 p/s, Combined @ 3.06 p/s\n", "Processed 1 pages in 0.31s; Preprocess @ 18.77 p/s, Model @ 3.97 p/s, Combined @ 3.28 p/s\n", "Processed 1 pages in 0.34s; Preprocess @ 13.16 p/s, Model @ 3.85 p/s, Combined @ 2.98 p/s\n", "Processed 1 pages in 0.32s; Preprocess @ 14.79 p/s, Model @ 4.00 p/s, Combined @ 3.15 p/s\n", "Processed 1 pages in 0.31s; Preprocess @ 16.13 p/s, Model @ 3.96 p/s, Combined @ 3.18 p/s\n", "Processed 1 pages in 0.32s; Preprocess @ 13.91 p/s, Model @ 3.96 p/s, Combined @ 3.08 p/s\n", "Processed 1 pages in 0.33s; Preprocess @ 13.92 p/s, Model @ 3.89 p/s, Combined @ 3.04 p/s\n", "Processed 1 pages in 0.31s; Preprocess @ 16.35 p/s, Model @ 4.02 p/s, Combined @ 3.22 p/s\n", "Processed 1 pages in 0.32s; Preprocess @ 14.15 p/s, Model @ 3.93 p/s, Combined @ 3.08 p/s\n", "Processed 1 pages in 0.31s; Preprocess @ 16.76 p/s, Model @ 4.02 p/s, Combined @ 3.24 p/s\n", "Processed 1 pages in 0.32s; Preprocess @ 14.91 p/s, Model @ 3.91 p/s, Combined @ 3.10 p/s\n", "Processed 1 pages in 0.31s; Preprocess @ 17.55 p/s, Model @ 3.91 p/s, Combined @ 3.20 p/s\n", "Processed 1 pages in 0.31s; Preprocess @ 16.59 p/s, Model @ 3.96 p/s, Combined @ 3.20 p/s\n", "Processed 1 pages in 0.31s; Preprocess @ 16.46 p/s, Model @ 3.99 p/s, Combined @ 3.21 p/s\n", "Processed 1 pages in 0.31s; Preprocess @ 15.76 p/s, Model @ 3.98 p/s, Combined @ 3.18 p/s\n", "Processed 1 pages in 0.32s; Preprocess @ 17.23 p/s, Model @ 3.89 p/s, Combined @ 3.17 p/s\n", "Processed 1 pages in 0.32s; Preprocess @ 16.74 p/s, Model @ 3.83 p/s, Combined @ 3.12 p/s\n", "Processed 1 pages in 0.31s; Preprocess @ 16.31 p/s, Model @ 3.96 p/s, Combined @ 3.19 p/s\n", "Processed 1 pages in 0.32s; Preprocess @ 15.65 p/s, Model @ 3.90 p/s, Combined @ 3.12 p/s\n", "Processed 1 pages in 0.31s; Preprocess @ 17.00 p/s, Model @ 3.91 p/s, Combined @ 3.18 p/s\n", "Processed 1 pages in 0.31s; Preprocess @ 17.15 p/s, Model @ 3.96 p/s, Combined @ 3.22 p/s\n", "Processed 1 pages in 0.31s; Preprocess @ 18.16 p/s, Model @ 3.94 p/s, Combined @ 3.23 p/s\n", "Processed 1 pages in 0.31s; Preprocess @ 17.85 p/s, Model @ 3.96 p/s, Combined @ 3.24 p/s\n", "Processed 1 pages in 0.31s; Preprocess @ 16.08 p/s, Model @ 3.97 p/s, Combined @ 3.19 p/s\n", "Processed 1 pages in 0.33s; Preprocess @ 14.34 p/s, Model @ 3.82 p/s, Combined @ 3.01 p/s\n", "Processed 1 pages in 0.31s; Preprocess @ 16.00 p/s, Model @ 3.99 p/s, Combined @ 3.19 p/s\n", "Processed 1 pages in 0.32s; Preprocess @ 15.09 p/s, Model @ 3.90 p/s, Combined @ 3.10 p/s\n", "Processed 1 pages in 0.33s; Preprocess @ 13.95 p/s, Model @ 3.91 p/s, Combined @ 3.05 p/s\n", "Processed 1 pages in 0.33s; Preprocess @ 14.24 p/s, Model @ 3.83 p/s, Combined @ 3.02 p/s\n", "Processed 1 pages in 0.33s; Preprocess @ 15.65 p/s, Model @ 3.82 p/s, Combined @ 3.07 p/s\n", "Processed 1 pages in 0.31s; Preprocess @ 18.55 p/s, Model @ 3.84 p/s, Combined @ 3.18 p/s\n", "Processed 1 pages in 0.31s; Preprocess @ 17.15 p/s, Model @ 3.91 p/s, Combined @ 3.19 p/s\n", "Processed 1 pages in 0.40s; Preprocess @ 16.09 p/s, Model @ 2.97 p/s, Combined @ 2.51 p/s\n", "Processed 1 pages in 0.32s; Preprocess @ 16.45 p/s, Model @ 3.88 p/s, Combined @ 3.14 p/s\n", "Processed 1 pages in 0.32s; Preprocess @ 13.94 p/s, Model @ 3.96 p/s, Combined @ 3.08 p/s\n", "Processed 1 pages in 0.32s; Preprocess @ 16.54 p/s, Model @ 3.88 p/s, Combined @ 3.14 p/s\n", "Processed 1 pages in 0.32s; Preprocess @ 15.69 p/s, Model @ 3.93 p/s, Combined @ 3.15 p/s\n", "Processed 1 pages in 0.32s; Preprocess @ 16.91 p/s, Model @ 3.80 p/s, Combined @ 3.10 p/s\n", "Processed 1 pages in 0.32s; Preprocess @ 15.94 p/s, Model @ 3.90 p/s, Combined @ 3.14 p/s\n", "Processed 1 pages in 0.33s; Preprocess @ 14.57 p/s, Model @ 3.88 p/s, Combined @ 3.06 p/s\n", "Processed 1 pages in 0.32s; Preprocess @ 15.56 p/s, Model @ 3.96 p/s, Combined @ 3.16 p/s\n", "Processed 1 pages in 0.31s; Preprocess @ 15.65 p/s, Model @ 4.01 p/s, Combined @ 3.19 p/s\n", "Processed 1 pages in 0.32s; Preprocess @ 15.18 p/s, Model @ 3.96 p/s, Combined @ 3.14 p/s\n", "Processed 1 pages in 0.26s; Preprocess @ 19.89 p/s, Model @ 4.67 p/s, Combined @ 3.78 p/s\n", "Processed 1 pages in 0.38s; Preprocess @ 8.36 p/s, Model @ 3.82 p/s, Combined @ 2.62 p/s\n", "Processed 1 pages in 0.34s; Preprocess @ 12.50 p/s, Model @ 3.90 p/s, Combined @ 2.97 p/s\n", "Processed 1 pages in 0.33s; Preprocess @ 14.17 p/s, Model @ 3.87 p/s, Combined @ 3.04 p/s\n", "Processed 1 pages in 0.33s; Preprocess @ 13.18 p/s, Model @ 3.93 p/s, Combined @ 3.03 p/s\n", "Processed 1 pages in 0.33s; Preprocess @ 14.32 p/s, Model @ 3.91 p/s, Combined @ 3.07 p/s\n", "Processed 1 pages in 0.31s; Preprocess @ 18.26 p/s, Model @ 3.98 p/s, Combined @ 3.27 p/s\n", "Processed 1 pages in 0.33s; Preprocess @ 12.93 p/s, Model @ 3.95 p/s, Combined @ 3.02 p/s\n", "Processed 1 pages in 0.38s; Preprocess @ 8.30 p/s, Model @ 3.86 p/s, Combined @ 2.64 p/s\n", "Processed 1 pages in 0.31s; Preprocess @ 19.10 p/s, Model @ 3.94 p/s, Combined @ 3.27 p/s\n", "Processed 1 pages in 0.30s; Preprocess @ 20.54 p/s, Model @ 3.96 p/s, Combined @ 3.32 p/s\n", "Processed 1 pages in 0.47s; Preprocess @ 4.84 p/s, Model @ 3.78 p/s, Combined @ 2.12 p/s\n", "Processed 1 pages in 0.46s; Preprocess @ 5.14 p/s, Model @ 3.76 p/s, Combined @ 2.17 p/s\n", "Processed 1 pages in 0.37s; Preprocess @ 9.05 p/s, Model @ 3.83 p/s, Combined @ 2.69 p/s\n", "Processed 1 pages in 0.44s; Preprocess @ 5.65 p/s, Model @ 3.80 p/s, Combined @ 2.27 p/s\n", "Processed 1 pages in 0.30s; Preprocess @ 18.30 p/s, Model @ 4.01 p/s, Combined @ 3.29 p/s\n", "Processed 1 pages in 0.31s; Preprocess @ 17.80 p/s, Model @ 3.99 p/s, Combined @ 3.26 p/s\n", "Processed 1 pages in 0.44s; Preprocess @ 5.54 p/s, Model @ 3.80 p/s, Combined @ 2.25 p/s\n", "Processed 1 pages in 0.43s; Preprocess @ 6.06 p/s, Model @ 3.83 p/s, Combined @ 2.35 p/s\n", "Processed 1 pages in 0.29s; Preprocess @ 23.36 p/s, Model @ 4.01 p/s, Combined @ 3.42 p/s\n", "Processed 1 pages in 0.30s; Preprocess @ 21.03 p/s, Model @ 3.96 p/s, Combined @ 3.33 p/s\n", "Processed 1 pages in 0.39s; Preprocess @ 7.61 p/s, Model @ 3.89 p/s, Combined @ 2.58 p/s\n", "Processed 1 pages in 0.37s; Preprocess @ 9.43 p/s, Model @ 3.86 p/s, Combined @ 2.74 p/s\n", "Processed 1 pages in 0.38s; Preprocess @ 8.56 p/s, Model @ 3.76 p/s, Combined @ 2.61 p/s\n", "Processed 1 pages in 0.30s; Preprocess @ 17.27 p/s, Model @ 4.17 p/s, Combined @ 3.36 p/s\n", "Processed 1 pages in 0.29s; Preprocess @ 21.41 p/s, Model @ 4.10 p/s, Combined @ 3.44 p/s\n", "Processed 1 pages in 0.25s; Preprocess @ 25.86 p/s, Model @ 4.79 p/s, Combined @ 4.04 p/s\n", "Processed 1 pages in 0.29s; Preprocess @ 18.30 p/s, Model @ 4.23 p/s, Combined @ 3.44 p/s\n", "Processed 1 pages in 0.29s; Preprocess @ 18.31 p/s, Model @ 4.22 p/s, Combined @ 3.43 p/s\n", "Processed 1 pages in 0.31s; Preprocess @ 15.33 p/s, Model @ 4.15 p/s, Combined @ 3.26 p/s\n", "Processed 1 pages in 0.27s; Preprocess @ 28.49 p/s, Model @ 4.30 p/s, Combined @ 3.73 p/s\n", "Processed 1 pages in 0.30s; Preprocess @ 16.30 p/s, Model @ 4.23 p/s, Combined @ 3.36 p/s\n", "Processed 1 pages in 0.38s; Preprocess @ 7.68 p/s, Model @ 4.04 p/s, Combined @ 2.65 p/s\n", "Processed 1 pages in 0.30s; Preprocess @ 17.05 p/s, Model @ 4.20 p/s, Combined @ 3.37 p/s\n", "Processed 1 pages in 0.28s; Preprocess @ 21.14 p/s, Model @ 4.22 p/s, Combined @ 3.52 p/s\n", "Processed 1 pages in 0.29s; Preprocess @ 19.45 p/s, Model @ 4.23 p/s, Combined @ 3.48 p/s\n", "Processed 1 pages in 0.28s; Preprocess @ 27.42 p/s, Model @ 4.12 p/s, Combined @ 3.58 p/s\n", "Processed 1 pages in 0.28s; Preprocess @ 26.06 p/s, Model @ 4.18 p/s, Combined @ 3.61 p/s\n", "Processed 1 pages in 0.31s; Preprocess @ 14.05 p/s, Model @ 4.14 p/s, Combined @ 3.20 p/s\n", "Processed 1 pages in 0.30s; Preprocess @ 17.09 p/s, Model @ 4.19 p/s, Combined @ 3.37 p/s\n", "Processed 1 pages in 0.29s; Preprocess @ 21.56 p/s, Model @ 4.18 p/s, Combined @ 3.50 p/s\n", "Processed 1 pages in 0.30s; Preprocess @ 16.07 p/s, Model @ 4.20 p/s, Combined @ 3.33 p/s\n", "Processed 1 pages in 0.31s; Preprocess @ 15.48 p/s, Model @ 4.10 p/s, Combined @ 3.24 p/s\n", "Processed 1 pages in 0.30s; Preprocess @ 18.15 p/s, Model @ 4.16 p/s, Combined @ 3.38 p/s\n", "Processed 1 pages in 0.27s; Preprocess @ 27.90 p/s, Model @ 4.22 p/s, Combined @ 3.66 p/s\n", "Processed 1 pages in 0.29s; Preprocess @ 19.53 p/s, Model @ 4.23 p/s, Combined @ 3.48 p/s\n", "Processed 1 pages in 0.30s; Preprocess @ 17.30 p/s, Model @ 4.08 p/s, Combined @ 3.30 p/s\n", "Processed 1 pages in 0.29s; Preprocess @ 19.98 p/s, Model @ 4.22 p/s, Combined @ 3.48 p/s\n", "Processed 1 pages in 0.29s; Preprocess @ 19.05 p/s, Model @ 4.24 p/s, Combined @ 3.47 p/s\n", "Processed 1 pages in 0.29s; Preprocess @ 19.47 p/s, Model @ 4.22 p/s, Combined @ 3.47 p/s\n", "Processed 1 pages in 0.27s; Preprocess @ 28.00 p/s, Model @ 4.27 p/s, Combined @ 3.70 p/s\n", "Processed 1 pages in 0.27s; Preprocess @ 26.46 p/s, Model @ 4.32 p/s, Combined @ 3.71 p/s\n", "Processed 1 pages in 0.27s; Preprocess @ 26.07 p/s, Model @ 4.26 p/s, Combined @ 3.66 p/s\n", "Processed 1 pages in 0.28s; Preprocess @ 23.10 p/s, Model @ 4.23 p/s, Combined @ 3.58 p/s\n", "Processed 1 pages in 0.27s; Preprocess @ 25.87 p/s, Model @ 4.24 p/s, Combined @ 3.64 p/s\n", "Processed 1 pages in 0.35s; Preprocess @ 9.65 p/s, Model @ 4.09 p/s, Combined @ 2.87 p/s\n", "Processed 1 pages in 0.30s; Preprocess @ 15.68 p/s, Model @ 4.24 p/s, Combined @ 3.34 p/s\n", "Processed 1 pages in 0.31s; Preprocess @ 14.78 p/s, Model @ 4.14 p/s, Combined @ 3.23 p/s\n", "Processed 1 pages in 0.27s; Preprocess @ 25.73 p/s, Model @ 4.27 p/s, Combined @ 3.66 p/s\n", "Processed 1 pages in 0.28s; Preprocess @ 20.30 p/s, Model @ 4.29 p/s, Combined @ 3.54 p/s\n", "Processed 1 pages in 0.25s; Preprocess @ 25.02 p/s, Model @ 4.66 p/s, Combined @ 3.93 p/s\n", "Processed 1 pages in 0.29s; Preprocess @ 20.30 p/s, Model @ 4.18 p/s, Combined @ 3.46 p/s\n", "Processed 1 pages in 0.27s; Preprocess @ 27.28 p/s, Model @ 4.29 p/s, Combined @ 3.70 p/s\n", "Processed 1 pages in 0.28s; Preprocess @ 25.77 p/s, Model @ 4.23 p/s, Combined @ 3.63 p/s\n", "Processed 1 pages in 0.30s; Preprocess @ 16.03 p/s, Model @ 4.24 p/s, Combined @ 3.35 p/s\n", "Processed 1 pages in 0.30s; Preprocess @ 16.09 p/s, Model @ 4.23 p/s, Combined @ 3.35 p/s\n", "Processed 1 pages in 0.29s; Preprocess @ 18.12 p/s, Model @ 4.21 p/s, Combined @ 3.42 p/s\n", "Processed 1 pages in 0.29s; Preprocess @ 20.46 p/s, Model @ 4.15 p/s, Combined @ 3.45 p/s\n", "Processed 1 pages in 0.27s; Preprocess @ 27.33 p/s, Model @ 4.29 p/s, Combined @ 3.71 p/s\n", "Processed 1 pages in 0.32s; Preprocess @ 13.08 p/s, Model @ 4.09 p/s, Combined @ 3.12 p/s\n", "Processed 1 pages in 0.30s; Preprocess @ 16.28 p/s, Model @ 4.25 p/s, Combined @ 3.37 p/s\n", "--------------------\n", "E2E time taken: 132.19 seconds for 275 pages with throughput of 2.08 pages per second\n", "(Preprocess @ 52.25% i.e., Model @ 47.72%)\n" ] } ], "source": [ "batch_size = 1\n", "outputs = []\n", "st = time.perf_counter()\n", "total_preprocess_time = 0\n", "total_model_time = 0\n", "\n", "for i in range(0, len(images), batch_size):\n", " batch = images[i:i+batch_size]\n", " t0 = time.perf_counter()\n", " messages = [make_ocr_message(image) for image in batch]\n", " t1 = time.perf_counter()\n", " outputs = llm.chat(messages, sampling_params, use_tqdm=False)\n", " t2 = time.perf_counter()\n", "\n", " preprocess_time = t1 - t0\n", " model_time = t2 - t1\n", " total_time = t2 - t0\n", " total_preprocess_time += preprocess_time\n", " total_model_time += model_time\n", " preprocess_throughput = len(batch) / preprocess_time\n", " model_throughput = len(batch) / model_time\n", " total_throughput = len(batch) / total_time\n", " print(f\"Processed {len(batch)} pages in {total_time:.2f}s; Preprocess @ {preprocess_throughput:.2f} p/s, Model @ {model_throughput:.2f} p/s, Combined @ {total_throughput:.2f} p/s\")\n", " outputs.extend(outputs)\n", "total_time_taken = time.perf_counter() - st\n", "print('--' * 10)\n", "print(f\"E2E time taken: {total_time_taken:.2f} seconds for {len(images)} pages with throughput of {(len(images) / total_time_taken):.2f} pages per second\")\n", "print(f\"(Preprocess @ {total_preprocess_time*100/total_time_taken:.2f}% i.e., Model @ {total_model_time*100/total_time_taken:.2f}%)\")" ] }, { "cell_type": "code", "execution_count": 39, "id": "357b749a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Processed 16 pages in 3.95s; Preprocess @ 8.23 p/s, Model @ 7.96 p/s, Combined @ 4.05 p/s\n", "Processed 16 pages in 3.64s; Preprocess @ 9.40 p/s, Model @ 8.25 p/s, Combined @ 4.40 p/s\n", "Processed 16 pages in 7.25s; Preprocess @ 3.14 p/s, Model @ 7.40 p/s, Combined @ 2.21 p/s\n", "Processed 16 pages in 10.22s; Preprocess @ 2.02 p/s, Model @ 6.93 p/s, Combined @ 1.57 p/s\n", "Processed 16 pages in 10.29s; Preprocess @ 2.00 p/s, Model @ 6.96 p/s, Combined @ 1.56 p/s\n", "Processed 16 pages in 10.15s; Preprocess @ 2.03 p/s, Model @ 7.10 p/s, Combined @ 1.58 p/s\n", "Processed 16 pages in 10.19s; Preprocess @ 2.03 p/s, Model @ 6.98 p/s, Combined @ 1.57 p/s\n", "Processed 16 pages in 10.22s; Preprocess @ 2.03 p/s, Model @ 6.88 p/s, Combined @ 1.56 p/s\n", "Processed 16 pages in 10.14s; Preprocess @ 2.04 p/s, Model @ 7.01 p/s, Combined @ 1.58 p/s\n", "Processed 16 pages in 8.04s; Preprocess @ 2.76 p/s, Model @ 7.16 p/s, Combined @ 1.99 p/s\n", "Processed 16 pages in 3.09s; Preprocess @ 15.93 p/s, Model @ 7.67 p/s, Combined @ 5.18 p/s\n", "Processed 16 pages in 3.34s; Preprocess @ 15.49 p/s, Model @ 6.95 p/s, Combined @ 4.80 p/s\n", "Processed 16 pages in 3.15s; Preprocess @ 14.52 p/s, Model @ 7.82 p/s, Combined @ 5.08 p/s\n", "Processed 16 pages in 3.74s; Preprocess @ 9.54 p/s, Model @ 7.75 p/s, Combined @ 4.28 p/s\n", "Processed 16 pages in 3.06s; Preprocess @ 14.56 p/s, Model @ 8.16 p/s, Combined @ 5.23 p/s\n", "Processed 16 pages in 2.68s; Preprocess @ 19.36 p/s, Model @ 8.62 p/s, Combined @ 5.96 p/s\n", "Processed 16 pages in 2.60s; Preprocess @ 19.21 p/s, Model @ 9.03 p/s, Combined @ 6.14 p/s\n", "Processed 3 pages in 0.62s; Preprocess @ 17.18 p/s, Model @ 6.72 p/s, Combined @ 4.83 p/s\n", "--------------------\n", "E2E time taken: 106.38 seconds for 275 pages with throughput of 2.58 pages per second\n", "(Preprocess @ 65.55% i.e., Model @ 34.44%)\n" ] } ], "source": [ "batch_size = 16\n", "outputs = []\n", "st = time.perf_counter()\n", "total_preprocess_time = 0\n", "total_model_time = 0\n", "\n", "for i in range(0, len(images), batch_size):\n", " batch = images[i:i+batch_size]\n", " t0 = time.perf_counter()\n", " messages = [make_ocr_message(image) for image in batch]\n", " t1 = time.perf_counter()\n", " outputs = llm.chat(messages, sampling_params, use_tqdm=False)\n", " t2 = time.perf_counter()\n", "\n", " preprocess_time = t1 - t0\n", " model_time = t2 - t1\n", " total_time = t2 - t0\n", " total_preprocess_time += preprocess_time\n", " total_model_time += model_time\n", " preprocess_throughput = len(batch) / preprocess_time\n", " model_throughput = len(batch) / model_time\n", " total_throughput = len(batch) / total_time\n", " print(f\"Processed {len(batch)} pages in {total_time:.2f}s; Preprocess @ {preprocess_throughput:.2f} p/s, Model @ {model_throughput:.2f} p/s, Combined @ {total_throughput:.2f} p/s\")\n", " outputs.extend(outputs)\n", "total_time_taken = time.perf_counter() - st\n", "print('--' * 10)\n", "print(f\"E2E time taken: {total_time_taken:.2f} seconds for {len(images)} pages with throughput of {(len(images) / total_time_taken):.2f} pages per second\")\n", "print(f\"(Preprocess @ {total_preprocess_time*100/total_time_taken:.2f}% i.e., Model @ {total_model_time*100/total_time_taken:.2f}%)\")" ] }, { "cell_type": "markdown", "id": "00ec73a1", "metadata": {}, "source": [ "## No Batching but using `make_ocr_message`" ] }, { "cell_type": "code", "execution_count": 57, "id": "590cb264", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Total time: 107.11 seconds with throughput of 2.57 p/s\n", "Preprocess throughput: 3.97 p/s (64.75%)\n", "Model throughput: 7.28 p/s (35.25%)\n" ] } ], "source": [ "t0 = time.perf_counter()\n", "messages = [make_ocr_message(image) for image in images]\n", "t1 = time.perf_counter()\n", "outputs_base64_image = llm.chat(messages, sampling_params, use_tqdm=False)\n", "t2 = time.perf_counter()\n", "\n", "preprocess_time = t1 - t0\n", "model_time = t2 - t1\n", "total_time = t2 - t0\n", "preprocess_throughput = len(messages) / preprocess_time\n", "model_throughput = len(messages) / model_time\n", "total_throughput = len(messages) / total_time\n", "\n", "print(f\"Total time: {total_time:.2f} seconds with throughput of {total_throughput:.2f} p/s\")\n", "print(f\"Preprocess throughput: {preprocess_throughput:.2f} p/s ({(preprocess_time*100/total_time):.2f}%)\")\n", "print(f\"Model throughput: {model_throughput:.2f} p/s ({(model_time*100/total_time):.2f}%)\")" ] }, { "cell_type": "code", "execution_count": null, "id": "5cd485b1", "metadata": {}, "outputs": [], "source": [ "t0 = time.perf_counter()\n", "messages = [make_ocr_message(image) for image in images]\n", "t1 = time.perf_counter()\n", "outputs_base64_image = llm.chat(messages, sampling_params, use_tqdm=False)\n", "t2 = time.perf_counter()\n", "\n", "preprocess_time = t1 - t0\n", "model_time = t2 - t1\n", "total_time = t2 - t0\n", "preprocess_throughput = len(messages) / preprocess_time\n", "model_throughput = len(messages) / model_time\n", "total_throughput = len(messages) / total_time\n", "\n", "print(f\"Total time: {total_time:.2f} seconds with throughput of {total_throughput:.2f} p/s\")\n", "print(f\"Preprocess throughput: {preprocess_throughput:.2f} p/s ({(preprocess_time*100/total_time):.2f}%)\")\n", "print(f\"Model throughput: {model_throughput:.2f} p/s ({(model_time*100/total_time):.2f}%)\")" ] }, { "cell_type": "markdown", "id": "7a45941e", "metadata": {}, "source": [ "# Pass Image directly to model" ] }, { "cell_type": "code", "execution_count": 50, "id": "ff844f4f", "metadata": {}, "outputs": [], "source": [ "def process_single_image(image):\n", " \"\"\"single image\"\"\"\n", " prompt_in = \"\\n<|grounding|>Convert the document to markdown. \"\n", " cache_item = {\n", " \"prompt\": prompt_in,\n", " \"multi_modal_data\": {\"image\": image},\n", " }\n", " return cache_item\n" ] }, { "cell_type": "code", "execution_count": 56, "id": "266900db", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Total time: 24.48 seconds with throughput of 11.23 p/s\n" ] } ], "source": [ "t0 = time.perf_counter()\n", "outputs_direct_image = llm.generate(messages, sampling_params, use_tqdm=False)\n", "t2 = time.perf_counter()\n", "\n", "preprocess_time = t1 - t0\n", "model_time = t2 - t1\n", "total_time = t2 - t0\n", "preprocess_throughput = len(messages) / preprocess_time\n", "model_throughput = len(messages) / model_time\n", "total_throughput = len(messages) / total_time\n", "\n", "print(f\"Total time: {total_time:.2f} seconds with throughput of {total_throughput:.2f} p/s\")" ] }, { "cell_type": "markdown", "id": "7b5f71d4", "metadata": {}, "source": [ "## Compare outputs" ] }, { "cell_type": "code", "execution_count": 66, "id": "bdca0c8f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "6th INTERNATIONAL YOGA DAY (21-06-2020) --------- 6th INTERNATIONAL YOGA DAY (21- 06- 2020\n", "3/15/21\n", "\n", "image[[105, 115, --------- 3/15/21 \n", "\n", "image[[103, 115,\n", "0. Preamble\n", "\n", "text[[217, 161, --------- 0. Preamble \n", "\n", "image[[195, 152,\n", "3/15/21\n", "\n", "image[[105, 112, --------- 3/15/21 \n", "\n", "image[[104, 115,\n", "1. Problem\n", "\n", "image[[220, 170, 760 --------- 1. Problem \n", "\n", "image[[220, 170, 765\n", "3/15/21\n", "\n", "image[[105, 115, --------- 3/15/21 \n", "\n", "image[[104, 115,\n", "3/15/21\n", "\n", "title[[144, 140, --------- 3/15/21 \n", "\n", "text[[144, 141,\n", "3/15/21\n", "\n", "title[[144, 135, --------- 1. Problem \n", "\n", "sub_title[[144, 137,\n", "3/15/21\n", "\n", "title[[144, 135, --------- 3/15/21 \n", "\n", "sub_title[[144, 137\n", "3-day community workshop June 15-17 Contact me if interested \n", "\n", "text --------- 3/15/21 \n", "\n", "text[[144, 238,\n", "3/15/21\n", "\n", "text[[144, 137, --------- 3/15/21 \n", "\n", "image[[150, 180,\n", "3/15/21\n", "\n", "title[[145, 137, --------- 11. V/E/M \n", "\n", "table[[144, 167,\n", "3/15/21\n", "\n", "image[[105, 115, --------- 3/15/21 \n", "\n", "image[[102, 117,\n", "3/15/21\n", "\n", "text[[142, 137, --------- 3/15/21 \n", "\n", "image[[140, 170,\n", "3/15/21\n", "\n", "text[[144, 135, --------- 3/15/21 \n", "\n", "image[[104, 115,\n", "3/15/21\n", "\n", "text[[144, 131, --------- 3/15/21 \n", "\n", "image[[101, 115,\n", "3/15/21\n", "\n", "text[[144, 137, --------- 3/15/21 \n", "\n", "text[[144, 138,\n", "0.05\n", "\n", "image[[150, 175, 510 --------- 3/15/21 \n", "\n", "image[[112, 170,\n", "3/15/21\n", "\n", "image[[110, 117, --------- 3/15/21 \n", "\n", "image[[105, 115,\n", "3/15/21\n", "\n", "text[[824, 118, --------- 3/15/21 \n", "\n", "sub_title[[144, 148\n", "0.00\n", "\n", "text[[80, 98, 760 --------- For office use only Username: Password: Units: Web\n", "0.00 (plus VAT where applicable). This figure is reviewed annually by the --------- Note: Should ownership of the company signing this application form transfer to another company by\n", "2021\n", "\n", "image[[203, 110, 792, --------- 1 \n", "\n", "image[[201, 110, 792,\n", "1: 1: 1: 1: 1: 1 --------- 3 \n", "\n", "image[[202, 110, 792,\n", "12 Magistrates: 4\n", "\n", "text[[238, 220 --------- 5 \n", "\n", "sub_title[[247, 170, 490,\n", "7-25-1. Annual tax levy for construction or improvement of certain buildings --------- 7 \n", "\n", "sub_title[[270, 137, 727,\n", "2000-2019 (More employment/commercial activity → civil litigation, small --------- 9 \n", "\n", "image[[200, 110, 795,\n", "56,735\n", "\n", "text[[250, 137, 716 --------- 11 \n", "\n", "image[[212, 560, 794,\n", "500\n", "\n", "image[[260, 160, 767, --------- 13 \n", "\n", "image[[255, 163, 768,\n", "15\n", "\n", "image[[202, 110, 790, --------- 15 \n", "\n", "image[[201, 110, 790,\n", "77% increase, FY13 to FY20\n", "\n", "image[[245 --------- 17 \n", "\n", "image[[201, 110, 794,\n", "19\n", "\n", "image[[202, 108, 792, --------- 19 \n", "\n", "image[[201, 110, 792,\n", "21\n", "\n", "image[[202, 108, 792, --------- 21 \n", "\n", "image[[202, 545, 792,\n", "23\n", "\n", "image[[202, 108, 792, --------- 23 \n", "\n", "image[[202, 550, 794,\n", "6 courtrooms/hearing rooms, including two jury trial courtrooms and two smaller --------- 25 \n", "\n", "table[[300, 180, 699,\n", "Ten Year Projected Need\n", "\n", "text[[235, 188, --------- 27 \n", "\n", "sub_title[[260, 135, 744,\n", "29\n", "\n", "table[[245, 170, 749, --------- 29 \n", "\n", "table[[245, 173, 750,\n", "1. Property Compliance Specialist 2. Reports to: Director of Housing 3 --------- Title: Property Compliance Specialist Reports to: Director of Housing Department: Housing\n", "Must have two years' nonprofit experience, demonstrated ability to work effectively with diverse populations --------- 1 \n", "\n", "text[[173, 175, 840,\n", "1. 1. 1. 1. 1. 1 --------- ANNUAL FINANCIAL REPORT \n", "\n", "text[[420,\n", "********* FINANCIAL SECTION *****\n", "\n", "text[[68, --------- Financial Information for the following funds and account groups are included in the Annual Financial Report\n", "2010 EdpCode 2011\n", "\n", "text[[92, --------- 2010 EdpCode 2011 \n", "\n", "text[[91,\n", "2010 EdpCode 2011\n", "\n", "text[[80, --------- (A) GENERAL Balance Sheet Code Description 2010 Edp\n", "2010 EdpCode 2011\n", "\n", "text[[92, --------- 2010 EdpCode 2011 \n", "\n", "text[[91,\n", "2010 EdpCode 2011\n", "\n", "text[[80, --------- 2010 EdpCode 2011 \n", "\n", "text[[80,\n", "2010 EdpCode 2011\n", "\n", "text[[95, --------- 2010 EdpCode 2011 \n", "\n", "text[[95,\n", "2010 EdpCode 2011\n", "\n", "text[[82, --------- 2010 EdpCode 2011 \n", "\n", "text[[82,\n", "2010 EdpCode 2011\n", "\n", "text[[90, --------- 2010 EdpCode 2011 \n", "\n", "text[[90,\n", "2011\n", "\n", "text[[80, 118, 168, --------- 2010 EdpCode 2011 \n", "\n", "text[[80,\n", "2011 EdpCode 2012\n", "\n", "text[[92, --------- (A) GENERAL \n", "\n", "table[[88, 160,\n", "2011 EdpCode 2012\n", "\n", "text[[84, --------- 2011 EdpCode 2012 \n", "\n", "text[[84,\n", "2010 EdpCode 2011\n", "\n", "text[[86, --------- 2010 EdpCode 2011 \n", "\n", "text[[86,\n", "2011\n", "\n", "text[[85, 118, 301, --------- 2010 EdpCode 2011 \n", "\n", "text[[85,\n", "2010 EdpCode 2011\n", "\n", "text[[88, --------- 2010 EdpCode 2011 \n", "\n", "text[[88,\n", "2010 EdpCode 2011\n", "\n", "text[[85, --------- (B) GENERAL TOWN-OUTSIDE VG \n", "\n", "text\n", "2010 EdpCode 2011\n", "\n", "text[[92, --------- 2010 EdpCode 2011 \n", "\n", "text[[93,\n", "2011 EdpCode 2012\n", "\n", "text[[75, --------- (B) GENERAL TOWN-OUTSIDE VG \n", "\n", "table\n", "2011 EdpCode 2012\n", "\n", "text[[94, --------- 2011 EdpCode 2012 \n", "\n", "text[[92,\n", "2010 EdpCode 2011\n", "\n", "text[[88, --------- 2010 EdpCode 2011 \n", "\n", "text[[88,\n", "2010 EdpCode 2011\n", "\n", "text[[90, --------- 2010 EdpCode 2011 \n", "\n", "text[[90,\n", "2010 EdpCode 2011\n", "\n", "text[[82, --------- 2010 EdpCode 2011 \n", "\n", "text[[84,\n", "2010 EdpCode 2011\n", "\n", "text[[95, --------- 2010 EdpCode 2011 \n", "\n", "text[[95,\n", "2010 EdpCode 2011\n", "\n", "text[[73, --------- 2010 EdpCode 2011 \n", "\n", "text[[72,\n", "2010 EdpCode 2011\n", "\n", "text[[90, --------- 2010 EdpCode 2011 \n", "\n", "text[[88,\n", "2010 EdpCode 2011\n", "\n", "text[[81, --------- 2010 EdpCode 2011 \n", "\n", "text[[80,\n", "2010 EdpCode 2011\n", "\n", "text[[95, --------- 2010 EdpCode 2011 \n", "\n", "text[[95,\n", "2010 EdpCode 2011\n", "\n", "text[[76, --------- 2010 EdpCode 2011 \n", "\n", "text[[75,\n", "2011 EdpCode 2012\n", "\n", "text[[73, --------- 2011 EdpCode 2012 \n", "\n", "text[[72,\n", "2011 EdpCode 2012\n", "\n", "text[[92, --------- (DA) HIGHWAY-TOWN- WIDE \n", "\n", "table\n", "2010 EdpCode 2011\n", "\n", "text[[84, --------- 2010 EdpCode 2011 \n", "\n", "text[[84,\n", "2010 EdpCode 2011\n", "\n", "text[[95, --------- 2010 EdpCode 2011 \n", "\n", "text[[97,\n", "2010 EdpCode 2011\n", "\n", "text[[78, --------- 2010 EdpCode 2011 \n", "\n", "text[[78,\n", "2010 EdpCode 2011\n", "\n", "text[[92, --------- (DB) HIGHWAY-PART-TOWN \n", "\n", "text[[\n", "2010 EdpCode 2011\n", "\n", "text[[81, --------- 2010 EdpCode 2011 \n", "\n", "text[[80,\n", "2011 EdpCode 2012\n", "\n", "text[[94, --------- 2011 EdpCode 2012 \n", "\n", "text[[92,\n", "2011 EdpCode 2012\n", "\n", "text[[78, --------- 2011 EdpCode 2012 \n", "\n", "text[[78,\n", "2010 EdpCode 2011\n", "\n", "text[[92, --------- 2010 EdpCode 2011 \n", "\n", "text[[90,\n", "2010 EdpCode 2011\n", "\n", "text[[78, --------- 2010 EdpCode 2011 \n", "\n", "text[[78,\n", "2010 EdpCode 2011\n", "\n", "text[[92, --------- 2010 EdpCode 2011 \n", "\n", "text[[92,\n", "2010 EdpCode 2011\n", "\n", "text[[75, --------- 2010 EdpCode 2011 \n", "\n", "text[[75,\n", "2010 EdpCode 2011 \n", "\n", "text[[92, --------- 2010 EdpCode 2011 \n", "\n", "text[[93,\n", "2010 EdpCode 2011\n", "\n", "text[[90, --------- 2010 EdpCode 2011 \n", "\n", "text[[90,\n", "2010 EdpCode 2011\n", "\n", "text[[98, --------- 2010 EdpCode 2011 \n", "\n", "text[[100,\n", "2010 EdpCode 2011\n", "\n", "text[[90, --------- 2010 EdpCode 2011 \n", "\n", "text[[90,\n", "2010 EdpCode 2011\n", "\n", "text[[80, --------- (H) CAPITAL PROJECTS \n", "\n", "text[[80,\n", "2010 EdpCode 2011\n", "\n", "text[[97, --------- 2010 EdpCode 2011 \n", "\n", "text[[97,\n", "2010 EdpCode 2011\n", "\n", "text[[75, --------- (K) GENERAL FIXED ASSETS \n", "\n", "text[[75\n", "2010 EdpCode 2011\n", "\n", "text[[92, --------- 2010 EdpCode 2011 \n", "\n", "text[[92,\n", "2010 EdpCode 2011\n", "\n", "text[[72, --------- 2010 EdpCode 2011 \n", "\n", "text[[72,\n", "2010 EdpCode 2011\n", "\n", "text[[98, --------- 2010 EdpCode 2011 \n", "\n", "text[[97,\n", "2010 EdpCode 2011\n", "\n", "text[[80, --------- 2010 EdpCode 2011 \n", "\n", "text[[78,\n", "2010 EdpCode 2011\n", "\n", "text[[92, --------- 2010 EdpCode 2011 \n", "\n", "text[[92,\n", "2010 EdpCode 2011\n", "\n", "text[[80, --------- 2010 EdpCode 2011 \n", "\n", "text[[80,\n", "2010 EdpCode 2011\n", "\n", "text[[95, --------- 2010 EdpCode 2011 \n", "\n", "text[[95,\n", "2010 EdpCode 2011\n", "\n", "text[[80, --------- 2010 EdpCode 2011 \n", "\n", "text[[80,\n", "2010 EdpCode 2011\n", "\n", "text[[92, --------- 2010 EdpCode 2011 \n", "\n", "text[[92,\n", "2011 EdpCode 2012 \n", "\n", "text[[80, --------- (SS) SEWER \n", "\n", "table[[75, 160\n", "2011 EdpCode 2012\n", "\n", "text[[100, --------- 2011 EdpCode 2012 \n", "\n", "text[[98,\n", "2010 EdpCode 2011\n", "\n", "text[[80, --------- 2010 EdpCode 2011 \n", "\n", "text[[80,\n", "2010 EdpCode 2011\n", "\n", "text[[97, --------- 2010 EdpCode 2011 \n", "\n", "text[[100,\n", "2010 EdpCode 2011\n", "\n", "text[[82, --------- 2010 EdpCode 2011 \n", "\n", "text[[81,\n", "2010 EdpCode 2011\n", "\n", "text[[90, --------- 2010 EdpCode 2011 \n", "\n", "text[[90,\n", "2010 EdpCode 2011\n", "\n", "text[[80, --------- 2010 EdpCode 2011 \n", "\n", "text[[80,\n", "2011 EdpCode 2012 Estimated Revenues Est Rev - Real Property --------- 2011 EdpCode 2012 \n", "\n", "text[[88,\n", "2011 EdpCode 2012\n", "\n", "text[[80, --------- 2011 EdpCode 2012 \n", "\n", "text[[80,\n", "2010 EdpCode 2011\n", "\n", "text[[90, --------- 2010 EdpCode 2011 \n", "\n", "text[[88,\n", "2010 EdpCode 2011\n", "\n", "text[[95, --------- 2010 EdpCode 2011 \n", "\n", "text[[98,\n", "2010 EdpCode 2011\n", "\n", "text[[92, --------- 2010 EdpCode 2011 \n", "\n", "text[[90,\n", "2010 EdpCode 2011\n", "\n", "text[[80, --------- 2010 EdpCode 2011 \n", "\n", "text[[80,\n", "2010 EdpCode 2011\n", "\n", "text[[88, --------- 2010 EdpCode 2011 \n", "\n", "text[[88,\n", "2010 EdpCode 2011\n", "\n", "text[[95, --------- 2010 EdpCode 2011 \n", "\n", "text[[95,\n", "2010 EdpCode 2011\n", "\n", "text[[92, --------- 2010 EdpCode 2011 \n", "\n", "text[[95,\n", "2010 EdpCode 2011\n", "\n", "text[[80, --------- 2010 EdpCode 2011 \n", "\n", "text[[80,\n", "2010 EdpCode 2011\n", "\n", "text[[85, --------- 2010 EdpCode 2011 \n", "\n", "text[[85,\n", "2010 EdpCode 2011\n", "\n", "text[[80, --------- 2010 EdpCode 2011 \n", "\n", "text[[80,\n", "1. GENERAL Adjustment Reason Account Code A8015 To adjust prior year error ( --------- A) GENERAL Adjustment Reason Account Code A8015 To adjust prior year error\n", "0.00000004 8/13/1999 1.250 --------- Water and Other Purposes Exempt From Constitutional Debt Limit \n", "\n", "table\n", "0\n", "\n", "table[[45, 135, 732, --------- Water and Other Purposes Exempt From Constitutional Debt Limit \n", "\n", "\n", "2008000001\n", "\n", "table[[45, 135, --------- Indebtedness Not Exempt From Constitutional Debt Limit \n", "\n", "table\n", "2011000001\n", "\n", "table[[48, 135, --------- Indebtedness Not Exempt From Constitutional Debt Limit \n", "\n", "table\n", "0.00\n", "\n", "text[[45, 111, 465 --------- Indebtedness Not Exempt From Constitutional Debt Limit \n", "\n", "table\n", "9Z2001 9Z2011 9Z2021 9 --------- TOWN OF Pawling Schedule of Time Deposits and Investments For the\n", "0.00 0.00 0.00 0.00 --------- TOWN OF Pawling Bank Reconciliation For the Fiscal Year Ending 201\n", "0.00\n", "\n", "text[[426, 72, 663 --------- 1 \n", "\n", "text[[365, 152, 722,\n", "1) Does your municipality have a written procurement policy? 2) Have the --------- 1) Does your municipality have a written procurement policy? Yes 2) Have\n", "32\n", "\n", "table[[90, 95, 940, --------- 1 \n", "\n", "table[[88, 93, 940,\n", "0.00 0.00 0.00 0.00 --------- 1 \n", "\n", "table[[81, 85, 767,\n", "1. Type of Other Post Employment Benefits Plan 2. Annual Required Contribution( --------- 1. Type of Other Post Employment Benefits Plan \n", "2. Annual Required Contribution(\n", "0.000000000000000000000000000000000000000000 --------- TOWN OF PAWLING, NEW YORK NOTES TO THE FINANCIAL\n", "0.0.0.0.0.0.0.0. --------- NOTE A - Summary of Accounting Policies \n", "\n", "text[[129,\n", "0.0.0.0.0.0.0.0. --------- Summary of Accounting Policies (Continued) \n", "\n", "text[[240,\n", "0.0.0.0.0.0.0.0. --------- Summary of Accounting Policies (Continued) \n", "\n", "text[[250,\n", "0.00\n", "\n", "title[[270, 92, 774 --------- 5. EncumbrancesEncumbrance accounting, under which purchase orders,\n", "2011\n", "\n", "title[[256, 123, 760, --------- 7. Pension Plans \n", "\n", "sub_title[[196, 264,\n", "10. Property TaxesThe Town submits an approved budget to the Dutchess County --------- 10. Property Taxes \n", "\n", "text[[195, 234,\n", "2011 consist of the following: \n", "\n", "table[[131, --------- 1 \n", "\n", "sub_title[[135, 163, 382,\n", "2011\n", "\n", "title[[137, 186, 583, --------- 1. Changes \n", "\n", "text[[137, 299, 884\n", "1. Deficit Fund BalancesThe Town of Pawling Water District No. --------- 1. Deficit Fund Balances \n", "\n", "text[[174,\n", "1. LitigationThe Town is a defendant in various lawsuits and tax certior --------- 1. Litigation \n", "\n", "text[[128, 470,\n", "1P0\n", "\n", "text[[358, 172, 961 --------- Minutes of the meeting of Policy, Resources & Finance Committee of Yeovil Town\n", "2020. \n", "\n", "sub_title[[95, 87, --------- 2020. \n", "\n", "sub_title[[94, 90,\n", "0 to recommend the agreed budget and precept to Town Council for adoption. --------- and (2) to recommend the agreed budget and precept to Town Council for\n", "10/163 WEBSITE UPDATE\n", "\n", "text[[92, --------- 10/163 WEBSITE UPDATE \n", "\n", "text[[92,\n", "7:53pm – The member of the press left the meeting and did not --------- 7:53pm - The member of the press left the meeting and did not\n", "title[[170, 360, 825, 525]] --------- Report on Barriers to Reduce and Reuse Behaviours \n", "\n", "text\n", "2021\n", "\n", "title[[40, 123, 498, --------- Research project undertaken May 2021 Queenstown Lakes and Central Otago districts\n", "3\n", "\n", "text[[60, 214, 857, --------- 3 \n", "\n", "text[[60, 213, 858,\n", "0.5cm\n", "\n", "sub_title[[40, 105, --------- 1 \n", "\n", "sub_title[[42, 101, 226,\n", "For people to adopt reduce and reuse behaviours such as using a reusable cup for take --------- 1 \n", "\n", "sub_title[[52, 174, 309,\n", "The main reasons that people do not use their own cups/bags/containers --------- - The main reasons that people do not use their own cups/bags/contain\n", "1,003 participants in total, representing 1.4% of the total --------- 1,003 participants in total, representing \\(1.4\\%\\) of the total\n", "0-24 years old \n", "\n", "image[[77, 170, --------- What is your age? \n", "\n", "image[[77, 170,\n", "0.0.0.0.0.0.0.0. --------- 1 \n", "\n", "sub_title[[40, 130, 467,\n", "0 \"Covid has impacted income so cost is important. Covid also impacts hygiene --------- 1 Most people do not take their own containers for takeaways or deli products,\n", "1 1 1 1 1 1 1 1 --------- 1 \n", "\n", "sub_title[[40, 123, 400,\n", "2019 2021\n", "\n", "image[[48, 103, --------- 100% Always Usually Sometimes Once in a while Thinking of doing Never and unlikely to\n", "2019 2021\n", "\n", "image[[45, 99, --------- 100% Always Usually Sometimes Once in a while Thinking of doing Never and unlikely to\n", "2019 2021\n", "\n", "image[[72, 95, --------- 2019 2021 \n", "\n", "image[[72, 95,\n", "0% 20% 40% 60% 80% 100 --------- 100% 90% 80% 70% 60% 50\n", "0\n", "\n", "image[[70, 94, 870, --------- 0 0 0 0 0 0 0 0\n", "0% 10% 20% 30% 40% 50 --------- 100% 90% 80% 70% 60% 50\n", "0% 20% 40% 60% 80% 100 --------- 100% 90% 80% 70% 60% 50\n", "0 \"I much prefer to have my food supplied in their plastic or cardboard dishes --------- - With regard to taking containers for takeaways and deli items, most people are\n", "2021\n", "\n", "image[[50, 99, 890, --------- 100% 90% 80% 70% 60% 50\n", "2019 2021\n", "\n", "image[[58, 144, --------- 100% 90% 80% 70% 60% 50\n", "0% 0% 0% 0% 0% 0 --------- 100% 90% 80% 70% 60% 50\n", "For repair and secondhand options, the barriers are again more complex than with re --------- - For repair and secondhand options, the barriers are again more complex than with\n", "65% of 35-44 year olds always or usually use a reusable cup --------- 1 \n", "\n", "sub_title[[43, 123, 325,\n", "0.00 0.00 0.00 0.00 --------- 1 \n", "\n", "sub_title[[42, 101, 312,\n", "0% \"Never and Unlikely To\"\n", "\n", "image[[48, --------- How often do you... % \"Never and Unlikely To\" \n", "\n", "image\n", "For this question around behaviours to reduce waste, we did not ask what prevented people --------- 1 \n", "\n", "sub_title[[41, 130, 193,\n", "0.0.0.0.0.0.0.0. --------- Other behaviours - across the regions \n", "\n", "text[[65, 162\n", "6 extremely concerned\n", "\n", "text[[60, 175, 730 --------- 1. Plastic in our oceans \n", "2. Amount of single-use packaging being used\n", "0.0.0.0.0.0.0.0. --------- - There is no distinct difference from the 2019 results for this question,\n", "0% 20% 40% 60% 80% 100 --------- How concerned are you about... \n", "\n", "image[[48, 150\n", "100% 90% 80% 70% 60% 50 --------- Very or extremely concerned about % of total by region \n", "\n", "image[[\n", "0-24 25-34 35-44 45-54 --------- Does age affect our concern for different environmental issues? \n", "\n", "text[[\n", "0.00 0.00 0.00 0.00 --------- Does income affect our level of concern? \n", "\n", "text[[102,\n", "1. Healthy choices2. Impact on environment and our oceans3. Minimal packaging --------- Factors that influence our consumer choices \n", "\n", "text[[40, 174\n", "When asked about other influences, the common factors mentioned were: \n", "\n", "text --------- - When asked about other influences, the common factors mentioned were: \n", "\n", "text\n", "0% of total\n", "\n", "image[[58, 140, --------- 100% 90% 80% 70% 60% 50\n", "0% of total by region\n", "\n", "image[[55, 140 --------- 100% 80% 60% 40% 20% 0\n", "79% of respondents are interested in learning more to help them reduce their waste. --------- Learning more about reducing waste \n", "\n", "text[[40, 174,\n", "0\n", "\n", "text[[60, 97, 599, --------- When asked what specific information they would like to learn about, the top topics were\n", "0\n", "\n", "title[[39, 133, 689, --------- 1 \n", "\n", "sub_title[[43, 130, 689,\n", "1s there anything else you would like to tell us?\" \n", "\n", "text --------- 1 \n", "\n", "sub_title[[40, 130, 350,\n", "0: 0: 0: 0: 0: 0 --------- Future opportunities and challenges \n", "\n", "text[[65, 144,\n", "Wastebusters: to zero waste and beyond... \n", "\n", "image --------- Wastebusters: to zero waste and beyond... \n", "\n", "image\n", "2020 --------- What is Our ‘New Normal’ Like? \n", "\n", "text[[188\n", "image[[42, 168, 323, 812]] --------- image[[42, 164, 324, 816]]\n", "image[[44, 170, 323, 802]] --------- WEBINAR \n", "\n", "title[[355, 530, 770\n", "1\n", "\n", "title[[135, 212, 799, --------- Understanding a New Normal: Thinking Through Process \n", "\n", "text[[248,\n", "0 20 40 60\n", "\n", "image_caption[[150 --------- Need to Understand Local Human Psyche Impacting New Normal... \n", "\n", "image\n", "Does your business have an online sales component? \n", "\n", "image[[150 --------- Need to Understand Business Needs and Issues Impacting New Normal... \n", "\n", "image\n", "1/3 workforce working remotely \n", "\n", "text[[170, 555 --------- Need to Understand Trends Impacting New Normal \n", "\n", "text[[133,\n", "0.5\n", "\n", "title[[133, 212, 867 --------- Summary: Be Thoughtful in Recovery – No Time to Simply Follow – Be Strategic\n", "image[[43, 170, 323, 818]] --------- image[[44, 168, 323, 820]]\n", "title[[135, 205, 819, 266]] --------- The Facts – City of Birmingham \n", "\n", "text[[75, 295\n", "Mayor Randall L. Woodfin \n", "\n", "text[[123, 355 --------- 1 \n", "\n", "sub_title[[144, 205, 830,\n", "0.00\n", "\n", "image[[88, 220, 905 --------- Economic Crisis \n", "\n", "image[[87, 270, 905,\n", "2020\n", "\n", "text[[80, 380, 300, --------- image[[42, 167, 310, 380]]\n", "image[[45, 171, 323, 825]] --------- image[[45, 170, 325, 825]]\n", "image[[83, 170, 380, 728]] --------- image[[81, 170, 380, 730]]\n", "image[[78, 310, 308, 640]] --------- Revenue Impacts of COVID 19 \n", "\n", "image[[345, 305\n", "image[[150, 188, 820, 727]] --------- SOURCE OF AUDIENCE, PRE-COVID \n", "\n", "image[[305\n", "image[[71, 200, 472, 575]] --------- image[[68, 200, 473, 581]]\n", "0:00:00\n", "\n", "image[[66, 198, --------- image[[66, 197, 380, 490]]\n", "0.0\n", "\n", "title[[270, 373, 716 --------- PANEL DISCUSSION \n", "\n", "text[[290, 506, 697\n", "title[[87, 371, 900, 430]] --------- AudeNCE QUESTION AND ANSWER \n", "\n", "text[[231,\n", "title[[135, 222, 415, 285]] --------- sub_title[[134, 222, 416, 285\n", "Upcoming Webinars \n", "\n", "text[[135, 421, --------- sub_title[[135, 209, 454, 272\n", "image[[42, 170, 323, 810]] --------- image[[42, 169, 325, 816]]\n", "2019: “an important question—which no one seems interested in addressing— --------- text[[15, 20, 149, 55]]\n", "1. KREDITO SVARBA --------- I. KREDITO SVARBA\n", "2017.01.11 www.ekonomika.org 4 --------- sub_title[[336, 10, 657, 70\n", "2013): “analysts have found that over long periods of time there has --------- sub_title[[55, 110, 612, 157\n", "2000), Essays on the Great Depression, p. 24, on Irving --------- Bernanke (2000), Essays on the Great Depression, p. 24\n", "1. KREDITAS KREDITUI NELYGUS --------- title[[119, 356, 875, 580]]\n", "30 sk. “Money-Capital and Real Capital” iškryė: --------- - **Marx’as (Capital, 30 sk. “Money-Capital\n", "ší funkciné kredito krypčiu diferencijacija ší --------- sub_title[[265, 12, 728, 70\n", "2011. 11. 1. 2011. 11. --------- sub_title[[55, 115, 930, 225\n", "Richard Werner, Michael Hupson, Dirk Bezemer, Thomas Pal --------- sub_title[[264, 10, 729, 70\n", "‡ˆ‰Š‹ŒŽ --------- sub_title[[264, 10, 728, 68\n", "(GERO/BLOGO) KREDITO POVEIKIS --------- title[[180, 352, 816, 580]]\n", "2017.01.11 www.ekonomika.org \n", "\n", "image --------- image[[12, 230, 985, 515]]\n", "\u0007Kreditas gamybai vystyti koreliuo --------- sub_title[[180, 10, 810, 75\n", "1. jmonė skolinasi investicijoms, algoms ir --------- sub_title[[64, 6, 930, 72\n", "http://voxeu.org/article/great-mortgaging \n", "\n", "image --------- \"The great mortgaging\" \n", "\n", "text[[57, 250\n", "\u0007Kreditas vartojimui: \n", "\n", "text[[ --------- sub_title[[63, 11, 930, 75\n", "2012): \n", "\n", "text[[110, 150, 960 --------- Kapitalo pelno „ekonomika“ \n", "\n", "sub_title[[\n", "2012): \n", "\n", "text[[108, 183, 936 --------- „ Bezemer (2012): \n", "\n", "text[[108\n", "1. 2. 3. 4. 5. 6 --------- title[[77, 355, 914, 580]]\n", "Taigi, bendrojo kredito augimo poveikio e ---------  Taigi, bendrojo kredito augimo pove\n", " Regresija: \n", "\n", "equation[[157, --------- sub_title[[70, 6, 925, 75\n", "2000): volatility of growth tends to decrease and then increase with increasing financial depth --------- - Easterly et al (2000): volatility of growth tends to decrease and\n", "2011), 'Too Much Finance?', www.imf.org/external/p --------- sub_title[[70, 9, 921, 75\n", "2011) 'The Real Effects of Debt', http://www.bis.org --------- - Cecchetti et al (2011) ‘The Real Effects of Debt\n", "0.0\n", "\n", "title[[120, 352, 875 --------- title[[120, 357, 872, 580]]\n", "2017.01.11 www.ekonomika.org 28 --------- image[[20, 180, 976, 400]]\n", "2017.01.11 www.ekonomika.org 29 --------- sub_title[[328, 10, 670, 70\n", "ikkij buma/reformos/bu back \n", "\n", "image --------- title[[10, 8, 988, 78]]\n", "būsto kainų bumas\n", "\n", "text[[102, --------- title[[180, 10, 820, 72]]\n", "2014 m. rugpjūčio 5 d. 10: --------- title[[135, 12, 858, 75]]\n", "0.5inflationavimas\n", "\n", "text[[55, --------- 1 \n", "\n", "text[[55, 321, 975,\n", "text[[58, 108, 970, 247]] --------- - Viral V. Acharya@Tim Eisert@Christian Eufinger,\n", "VI. FINANSIALIZACIJA IR NELYGYBĖ --------- title[[88, 355, 905, 580]]\n", "0.0\n", "\n", "image[[216, 220, 783 --------- title[[241, 12, 753, 75]]\n", "2013, Financialization, Palgrave. --------- image[[331, 160, 728, 730]]\n", "5: the share of bank credit to the FIRE sector and rising inequality, --------- Figure 5: the share of bank credit to the FIRE sector and rising\n", "RENTIER, RENTIER, RENTIER, R --------- 7. RENTA, RENTIER, PALUKANY ESMÉ\n", "0.0\n", "\n", "text[[108, 370, 348 --------- sub_title[[69, 6, 930, 75\n", "title[[175, 10, 825, 78]] --------- sub_title[[175, 10, 822, 75\n", "0.0.0.0.0.0.0.0. --------- - There is, however, a second, much more fundamental inference from our argument\n", "2014: What level of real interest rates is fair? Post Keynesians argue --------- - Koudis (2014): What level of real interest rates is fair\n", "Ulrich’as Bindseil’is (ECB) savo k --------- sub_title[[138, 7, 857, 75\n", "1. 2. 3. 4. 5. 6 --------- sub_title[[15, 345, 978, 440\n", "21 pav. PFI paskolų privačiajam ne finans --------- 20 pav. PFĮ paskolų privačiajam ne finans\n", "Ką matome/jaučiame? Tai, kad LT privatus se --------- sub_title[[54, 110, 925, 195\n", "Mismatch count: 258\n", "Match count: 17\n" ] } ], "source": [ "mismatch_count = 0\n", "match_count = 0\n", "for o1, o2 in zip(outputs_base64_image, outputs_direct_image):\n", " for o1_o, o2_o in zip(o1.outputs, o2.outputs):\n", " if o1_o.text != o2_o.text:\n", " print(o1_o.text.strip(), \" --------- \", o2_o.text.strip())\n", " mismatch_count += 1\n", " else:\n", " match_count += 1\n", " \n", "print(f\"Mismatch count: {mismatch_count}\")\n", "print(f\"Match count: {match_count}\")" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.11" } }, "nbformat": 4, "nbformat_minor": 5 }