#!/usr/env/bin python3 # -*- coding: utf-8 -*- # 读取系统文件用 import sys # base64 编码 import base64 # 网络请求库 import requests # 枚举类型 from enum import Enum # 图像处理 import cv2 # PyQt 框架 from PyQt5 import QtCore from PyQt5.QtGui import QPixmap from PyQt5.QtCore import QSize, pyqtSlot from PyQt5.QtWidgets import QApplication, QDesktopWidget, QMainWindow, QPushButton, QLabel, QTextEdit, QComboBox, QFileDialog, QMessageBox # 窗口标题 window_title = "文本识别" API_KEY = "YOUR_API_KEY_HERE" SECRET_KEY = "YOUR_SECRET_KEY_HERE" TOKEN_URL = 'https://aip.baidubce.com/oauth/2.0/token' OCR_URL = 'https://aip.baidubce.com/rest/2.0/ocr/v1/' # OCR 语言接口参数封装 class Language(Enum): auto_detect = 0 CHN_ENG = 1 ENG = 2 JAP = 3 KOR = 4 FRE = 5 SPA = 6 POR = 7 GER = 8 ITA = 9 RUS = 10 DAN = 11 DUT = 12 MAL = 13 SWE = 14 IND = 15 POL = 16 ROM = 17 TUR = 18 GRE = 19 HUN = 20 # OCR 调用封装类 class OCR(object): def __init__(self, api_key, secret_key): super().__init__() self.API_KEY = api_key self.SECRET_KEY = secret_key self.OCR_TYPE = "通用文字" self.ACCESS_TOKEN = self.fetch_token() self.LANGUAGE = Language.auto_detect.name self.DETECT_DIRECTION = "false" self.PARAGRAPH = "false" self.PROBABILITY = "true" def set_language(self, language): self.LANGUAGE = language.name def set_detect_direction(self, detect_direction): self.DETECT_DIRECTION = detect_direction def set_paragraph(self, paragraph): self.PARAGRAPH = paragraph def set_probability(self, probability): self.PROBABILITY = probability # 获取 Token def fetch_token(self): response = requests.post(TOKEN_URL, data={ 'grant_type': 'client_credentials', 'client_id': self.API_KEY, 'client_secret': self.SECRET_KEY }) if response: return response.json()['access_token'] # 编码图片文件 def encode_image(self, image_path): with open(image_path, 'rb') as f: image_data = f.read() return base64.b64encode(image_data) # 编码 PDF 文件 def encode_pdf(self, pdf_path): with open(pdf_path, 'rb') as f: pdf_data = f.read() def accurate_basic(self, image_path): ocr_url = OCR_URL + "accurate_basic" request_url = ocr_url + '?access_token=' + self.ACCESS_TOKEN headers = {'Content-Type': 'application/x-www-form-urlencoded'} params = { 'image': self.encode_image(image_path), 'language_type': self.LANGUAGE, 'detect_direction': self.DETECT_DIRECTION, 'paragraph': self.PARAGRAPH, 'probability': self.PROBABILITY, } json_data = requests.post( request_url, headers=headers, data=params).json() result_text = "" for paragraphs in json_data["paragraphs_result"]: for index in paragraphs["words_result_idx"]: result_text += json_data["words_result"][index]["words"] result_text += " " result_text += "\n" return result_text # 数字识别 def numbers(self, image_path): ocr_url = OCR_URL + "numbers" request_url = ocr_url + "?access_token=" + self.ACCESS_TOKEN headers = {'content-type': 'application/x-www-form-urlencoded'} params = { 'image': self.encode_image(image_path), 'detect_direction': self.DETECT_DIRECTION, } json_data = requests.post( request_url, headers=headers, data=params).json() result_text = "" for result in json_data["words_result"]: numbers = result['words'] result_text += numbers result_text += "\n" return result_text # 手写识别 def handwriting(self, image_path): ocr_url = OCR_URL + "handwriting" request_url = ocr_url + "?access_token=" + self.ACCESS_TOKEN headers = {'content-type': 'application/x-www-form-urlencoded'} params = { 'image': self.encode_image(image_path), 'detect_direction': self.DETECT_DIRECTION, 'probability': self.PROBABILITY, } json_data = requests.post( request_url, headers=headers, data=params).json() result_text = "" for result in json_data["words_result"]: words = result['words'] result_text += words result_text += "\n" return result_text # 获得 OCR 结果 def get_ocr_result(self, image_path): if self.OCR_TYPE == "通用文字": return self.accurate_basic(image_path) elif self.OCR_TYPE == "数字识别": return self.numbers(image_path) elif self.OCR_TYPE == "手写文字": return self.handwriting(image_path) # 窗口类 class Window(QMainWindow): def __init__(self): super().__init__() self.initWindow() self.initUI() self.initOCR() self.center() # 初始化窗口大小 def initWindow(self): self.setWindowTitle(window_title) self.setFixedWidth(1280) self.setFixedHeight(720) # 初始化窗口界面 def initUI(self): self.initOCROptionSelection() self.initImageSelection() self.initTextResult() # 初始化 OCR 类 def initOCR(self): self.OCR = OCR(API_KEY, SECRET_KEY) self.OCR.set_detect_direction("false") self.OCR.set_language(Language.CHN_ENG) self.OCR.set_paragraph("true") self.OCR.set_probability("false") # OCR 识别类型控件 def initOCROptionSelection(self): self.ocrOptionLabel = QLabel("选择识别类型", self) self.ocrOptionLabel.setFixedSize(QSize(220, 40)) self.ocrOptionLabel.move(80, 80) self.ocrOptionSelection = QComboBox(self) self.ocrOptionSelection.move(320, 80) self.ocrOptionSelection.setFixedSize(QSize(240, 40)) self.ocrOptionSelection.addItem("通用文字") self.ocrOptionSelection.addItem("数字识别") self.ocrOptionSelection.addItem("手写文字") # 绑定修改事件 self.ocrOptionSelection.currentIndexChanged.connect( self.selectionChange) # 图片选择及演示控件 def initImageSelection(self): self.imageSelectionButton = QPushButton("选择需要识别的图片", self) self.imageSelectionButton.setFixedSize(QSize(220, 80)) self.imageSelectionButton.move(80, 180) # 绑定点击事件 self.imageSelectionButton.clicked.connect(self.imageSelectionClicked) self.cameraSelectionButton = QPushButton("使用摄像头拍照", self) self.cameraSelectionButton.setFixedSize(QSize(220, 80)) self.cameraSelectionButton.move(340, 180) # 绑定点击事件 self.cameraSelectionButton.clicked.connect(self.cameraSelectionClicked) self.imagePreviewImage = QLabel(self) self.imagePreviewImage.setText("请选择要识别的图片") self.imagePreviewImage.setStyleSheet( "QLabel { background-color : gray; color : black; }") self.imagePreviewImage.setAlignment(QtCore.Qt.AlignCenter) self.imagePreviewImage.setFixedSize(QSize(480, 320)) self.imagePreviewImage.setScaledContents(True) self.imagePreviewImage.move(80, 320) # 识别结果控件 def initTextResult(self): self.textResult = QTextEdit(self) self.textResult.setFixedSize(QSize(480, 440)) self.textResult.move(720, 80) self.textCopy = QPushButton("复制到剪切板", self) self.textCopy.setFixedSize(QSize(480, 80)) self.textCopy.move(720, 560) # 绑定点击事件 self.textCopy.clicked.connect(self.copyClicked) # 窗口居中 def center(self): qr = self.frameGeometry() cp = QDesktopWidget().availableGeometry().center() qr.moveCenter(cp) self.move(qr.topLeft()) # 识别类型选择 def selectionChange(self, i): self.OCR.OCR_TYPE = self.ocrOptionSelection.currentText() # 选择系统图片 @pyqtSlot() def imageSelectionClicked(self): selected_file = QFileDialog.getOpenFileName(self, "选择你要识别的图片", "~/") file_path = selected_file[0] self.imagePreviewImage.setPixmap(QPixmap(file_path)) self.callOCR(file_path) @pyqtSlot() def cameraSelectionClicked(self): cam = cv2.VideoCapture(0) _, img = cam.read() cam_img_path = "./camera_temp.png" cv2.imwrite(cam_img_path, img) self.imagePreviewImage.setPixmap(QPixmap(cam_img_path)) self.callOCR(cam_img_path) # 复制到剪切板 @pyqtSlot() def copyClicked(self): print("copy") QApplication.clipboard().setText(self.textResult.toPlainText()) msg = QMessageBox(self) msg.setText('已复制到剪切板') msg.exec_() # 调用 OCR 接口 def callOCR(self, image_path): ocr_result = self.OCR.get_ocr_result(image_path) self.textResult.setPlainText(ocr_result) def main(): app = QApplication(sys.argv) window = Window() window.show() sys.exit(app.exec_()) if __name__ == '__main__': main()