zouyu
2025-09-26 3fbbfcc8f509c352c58dc8a126220b49b72ed5a0
ocr图像处理功能开发
已添加85个文件
已修改1个文件
已删除1个文件
47628 ■■■■■ 文件已修改
.gitignore 39 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
LICENSE 127 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
README.md 4 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
pom.xml 426 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/OcrToolApplication.java 16 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/config/DirectionModelConfig.java 33 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/config/OcrDetModelConfig.java 26 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/config/OcrRecModelConfig.java 37 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/config/OcrRecOptions.java 31 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/config/PlateDetModelConfig.java 40 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/config/PlateRecModelConfig.java 32 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/config/TableStructureConfig.java 25 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/entity/DirectionInfo.java 41 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/entity/IdCardInfo.java 12 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/entity/ImageInfo.java 50 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/entity/OcrBox.java 72 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/entity/OcrInfo.java 37 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/entity/OcrItem.java 54 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/entity/PlateInfo.java 45 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/entity/PlateResult.java 35 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/entity/RotatedBox.java 46 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/entity/RotatedBoxCompX.java 47 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/entity/TableStructureResult.java 33 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/enums/AngleEnum.java 38 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/enums/CommonDetModelEnum.java 32 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/enums/CommonRecModelEnum.java 32 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/enums/DirectionModelEnum.java 32 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/enums/PlateDetModelEnum.java 28 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/enums/PlateRecModelEnum.java 26 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/enums/PlateType.java 45 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/enums/TableStructureModelEnum.java 30 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/exception/OcrException.java 30 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/factory/OcrModelFactory.java 223 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/factory/PlateModelFactory.java 164 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/factory/TableRecModelFactory.java 107 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/model/common/detect/OcrCommonDetModel.java 104 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/model/common/detect/OcrCommonDetModelImpl.java 244 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/model/common/detect/criteria/OcrCommonDetCriterialFactory.java 53 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/model/common/detect/translator/PPOCRDetTranslator.java 536 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/model/common/direction/OcrDirectionModel.java 112 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/model/common/direction/PPOCRMobileV2ClsModel.java 393 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/model/common/direction/criteria/DirectionCriteriaFactory.java 57 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/model/common/direction/translator/PpWordRotateTranslator.java 105 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/model/common/recognize/OcrCommonRecModel.java 129 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/model/common/recognize/OcrCommonRecModelImpl.java 502 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/model/common/recognize/criteria/OcrCommonRecCriterialFactory.java 51 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/model/common/recognize/translator/PPOCRRecTranslator.java 129 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/model/plate/CRNNPlateRecModel.java 325 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/model/plate/PlateDetModel.java 112 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/model/plate/PlateRecModel.java 116 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/model/plate/Yolov5PlateDetModel.java 238 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/model/plate/criteria/PlateDetCriterialFactory.java 71 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/model/plate/criteria/PlateRecCriterialFactory.java 42 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/model/plate/translator/CRNNPlateRecTranslator.java 88 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/model/plate/translator/Yolo5PlateDetectTranslator.java 194 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/model/plate/translator/Yolov7PlateDetectTranslator.java 192 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/model/plate/translator/Yolov8PlateDetectTranslator.java 188 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/model/table/CommonTableStructureModel.java 163 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/model/table/TableRecognizer.java 485 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/model/table/TableStructureModel.java 65 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/model/table/criteria/StructureCriteriaFactory.java 56 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/model/table/translator/TableStructTranslator.java 191 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/opencv/OcrNDArrayUtils.java 228 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/utils/ConvertHtml2Excel.java 236 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/utils/CrossRangeCellMeta.java 42 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/smartjavaai/utils/OcrUtils.java 455 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/swingui/config/SwingAppConfig.java 31 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/swingui/constant/OcrSwingConstants.java 26 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/swingui/controller/OcrController.java 25 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/swingui/dto/OcrDTO.java 10 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/swingui/excel/ContractNumberExcelData.java 18 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/swingui/service/OcrService.java 195 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/swingui/swing/FileProcessorApp.java 100 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/swingui/swing/jpanel/ContractNumberProcessPanel.java 645 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/swingui/swing/jpanel/MultipleAreaProcessPanel.java 1166 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/swingui/swing/jpanel/PdfPreviewPanel.java 202 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/swingui/swing/utils/FileNameValidator.java 81 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/swingui/swing/utils/GenerateCustomizeComponent.java 122 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/xindao/ocr/swingui/swing/utils/ToFile.java 179 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/resources/PP_OCRv5/PP-LCNet_x0_25_textline_ori_infer/PP-LCNet_x0_25_textline_ori_infer.onnx 补丁 | 查看 | 原始文档 | blame | 历史
src/main/resources/PP_OCRv5/PP-OCRv5_mobile_rec_infer/PP-OCRv5_mobile_rec_infer.onnx 补丁 | 查看 | 原始文档 | blame | 历史
src/main/resources/PP_OCRv5/PP-OCRv5_mobile_rec_infer/dict.txt 18383 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/resources/PP_OCRv5/PP-OCRv5_server_det_infer/PP-OCRv5_server_det.onnx 补丁 | 查看 | 原始文档 | blame | 历史
src/main/resources/PP_OCRv5/PP-OCRv5_server_rec_infer/PP-OCRv5_server_rec.onnx 补丁 | 查看 | 原始文档 | blame | 历史
src/main/resources/PP_OCRv5/PP-OCRv5_server_rec_infer/dict.txt 18383 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/resources/application.yml 2 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/resources/logback-spring.xml 63 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
.gitignore
@@ -1,3 +1,42 @@
target/
!.mvn/wrapper/maven-wrapper.jar
!**/src/main/**/target/
!**/src/test/**/target/
### IntelliJ IDEA ###
.idea/
*.iws
*.iml
*.ipr
### Eclipse ###
.apt_generated
.classpath
.factorypath
.project
.settings
.springBeans
.sts4-cache
### NetBeans ###
/nbproject/private/
/nbbuild/
/dist/
/nbdist/
/.nb-gradle/
build/
!**/src/main/**/build/
!**/src/test/**/build/
### VS Code ###
.vscode/
### Mac OS ###
.DS_Store
logs/
target/
*.class
# Mobile Tools for Java (J2ME)
LICENSE
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,127 @@
                     æœ¨å…°å®½æ¾è®¸å¯è¯, ç¬¬2版
   æœ¨å…°å®½æ¾è®¸å¯è¯ï¼Œ ç¬¬2版
   2020å¹´1月 http://license.coscl.org.cn/MulanPSL2
   æ‚¨å¯¹â€œè½¯ä»¶â€çš„复制、使用、修改及分发受木兰宽松许可证,第2版(“本许可证”)的如下条款的约束:
   0. å®šä¹‰
      â€œè½¯ä»¶â€æ˜¯æŒ‡ç”±â€œè´¡çŒ®â€æž„成的许可在“本许可证”下的程序和相关文档的集合。
      â€œè´¡çŒ®â€æ˜¯æŒ‡ç”±ä»»ä¸€â€œè´¡çŒ®è€…”许可在“本许可证”下的受版权法保护的作品。
      â€œè´¡çŒ®è€…”是指将受版权法保护的作品许可在“本许可证”下的自然人或“法人实体”。
      â€œæ³•人实体”是指提交贡献的机构及其“关联实体”。
      â€œå…³è”实体”是指,对“本许可证”下的行为方而言,控制、受控制或与其共同受控制的机构,此处的控制是指有受控方或共同受控方至少50%直接或间接的投票权、资金或其他有价证券。
   1. æŽˆäºˆç‰ˆæƒè®¸å¯
      æ¯ä¸ªâ€œè´¡çŒ®è€…”根据“本许可证”授予您永久性的、全球性的、免费的、非独占的、不可撤销的版权许可,您可以复制、使用、修改、分发其“贡献”,不论修改与否。
   2. æŽˆäºˆä¸“利许可
      æ¯ä¸ªâ€œè´¡çŒ®è€…”根据“本许可证”授予您永久性的、全球性的、免费的、非独占的、不可撤销的(根据本条规定撤销除外)专利许可,供您制造、委托制造、使用、许诺销售、销售、进口其“贡献”或以其他方式转移其“贡献”。前述专利许可仅限于“贡献者”现在或将来拥有或控制的其“贡献”本身或其“贡献”与许可“贡献”时的“软件”结合而将必然会侵犯的专利权利要求,不包括对“贡献”的修改或包含“贡献”的其他结合。如果您或您的“关联实体”直接或间接地,就“软件”或其中的“贡献”对任何人发起专利侵权诉讼(包括反诉或交叉诉讼)或其他专利维权行动,指控其侵犯专利权,则“本许可证”授予您对“软件”的专利许可自您提起诉讼或发起维权行动之日终止。
   3. æ— å•†æ ‡è®¸å¯
      â€œæœ¬è®¸å¯è¯â€ä¸æä¾›å¯¹â€œè´¡çŒ®è€…”的商品名称、商标、服务标志或产品名称的商标许可,但您为满足第4条规定的声明义务而必须使用除外。
   4. åˆ†å‘限制
      æ‚¨å¯ä»¥åœ¨ä»»ä½•媒介中将“软件”以源程序形式或可执行形式重新分发,不论修改与否,但您必须向接收者提供“本许可证”的副本,并保留“软件”中的版权、商标、专利及免责声明。
   5. å…è´£å£°æ˜Žä¸Žè´£ä»»é™åˆ¶
      â€œè½¯ä»¶â€åŠå…¶ä¸­çš„“贡献”在提供时不带任何明示或默示的担保。在任何情况下,“贡献者”或版权所有者不对任何人因使用“软件”或其中的“贡献”而引发的任何直接或间接损失承担责任,不论因何种原因导致或者基于何种法律理论,即使其曾被建议有此种损失的可能性。
   6. è¯­è¨€
      â€œæœ¬è®¸å¯è¯â€ä»¥ä¸­è‹±æ–‡åŒè¯­è¡¨è¿°ï¼Œä¸­è‹±æ–‡ç‰ˆæœ¬å…·æœ‰åŒç­‰æ³•律效力。如果中英文版本存在任何冲突不一致,以中文版为准。
   æ¡æ¬¾ç»“束
   å¦‚何将木兰宽松许可证,第2版,应用到您的软件
   å¦‚果您希望将木兰宽松许可证,第2版,应用到您的新软件,为了方便接收者查阅,建议您完成如下三步:
      1, è¯·æ‚¨è¡¥å……如下声明中的空白,包括软件名、软件的首次发表年份以及您作为版权人的名字;
      2, è¯·æ‚¨åœ¨è½¯ä»¶åŒ…的一级目录下创建以“LICENSE”为名的文件,将整个许可证文本放入该文件中;
      3, è¯·å°†å¦‚下声明文本放入每个源文件的头部注释中。
   Copyright (c) 2025 DengWenJie
   SmartJavaAI is licensed under Mulan PSL v2.
   You can use this software according to the terms and conditions of the Mulan PSL v2.
   You may obtain a copy of Mulan PSL v2 at:
            http://license.coscl.org.cn/MulanPSL2
   THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
   See the Mulan PSL v2 for more details.
                     Mulan Permissive Software License,Version 2
   Mulan Permissive Software License,Version 2 (Mulan PSL v2)
   January 2020 http://license.coscl.org.cn/MulanPSL2
   Your reproduction, use, modification and distribution of the Software shall be subject to Mulan PSL v2 (this License) with the following terms and conditions:
   0. Definition
      Software means the program and related documents which are licensed under this License and comprise all Contribution(s).
      Contribution means the copyrightable work licensed by a particular Contributor under this License.
      Contributor means the Individual or Legal Entity who licenses its copyrightable work under this License.
      Legal Entity means the entity making a Contribution and all its Affiliates.
      Affiliates means entities that control, are controlled by, or are under common control with the acting entity under this License, â€˜control’ means direct or indirect ownership of at least fifty percent (50%) of the voting power, capital or other securities of controlled or commonly controlled entity.
   1. Grant of Copyright License
      Subject to the terms and conditions of this License, each Contributor hereby grants to you a perpetual, worldwide, royalty-free, non-exclusive, irrevocable copyright license to reproduce, use, modify, or distribute its Contribution, with modification or not.
   2. Grant of Patent License
      Subject to the terms and conditions of this License, each Contributor hereby grants to you a perpetual, worldwide, royalty-free, non-exclusive, irrevocable (except for revocation under this Section) patent license to make, have made, use, offer for sale, sell, import or otherwise transfer its Contribution, where such patent license is only limited to the patent claims owned or controlled by such Contributor now or in future which will be necessarily infringed by its Contribution alone, or by combination of the Contribution with the Software to which the Contribution was contributed. The patent license shall not apply to any modification of the Contribution, and any other combination which includes the Contribution. If you or your Affiliates directly or indirectly institute patent litigation (including a cross claim or counterclaim in a litigation) or other patent enforcement activities against any individual or entity by alleging that the Software or any Contribution in it infringes patents, then any patent license granted to you under this License for the Software shall terminate as of the date such litigation or activity is filed or taken.
   3. No Trademark License
      No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, except as required to fulfill notice requirements in Section 4.
   4. Distribution Restriction
      You may distribute the Software in any medium with or without modification, whether in source or executable forms, provided that you provide recipients with a copy of this License and retain copyright, patent, trademark and disclaimer statements in the Software.
   5. Disclaimer of Warranty and Limitation of Liability
      THE SOFTWARE AND CONTRIBUTION IN IT ARE PROVIDED WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED. IN NO EVENT SHALL ANY CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE TO YOU FOR ANY DAMAGES, INCLUDING, BUT NOT LIMITED TO ANY DIRECT, OR INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES ARISING FROM YOUR USE OR INABILITY TO USE THE SOFTWARE OR THE CONTRIBUTION IN IT, NO MATTER HOW IT’S CAUSED OR BASED ON WHICH LEGAL THEORY, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
   6. Language
      THIS LICENSE IS WRITTEN IN BOTH CHINESE AND ENGLISH, AND THE CHINESE VERSION AND ENGLISH VERSION SHALL HAVE THE SAME LEGAL EFFECT. IN THE CASE OF DIVERGENCE BETWEEN THE CHINESE AND ENGLISH VERSIONS, THE CHINESE VERSION SHALL PREVAIL.
   END OF THE TERMS AND CONDITIONS
   How to Apply the Mulan Permissive Software License,Version 2 (Mulan PSL v2) to Your Software
      To apply the Mulan PSL v2 to your work, for easy identification by recipients, you are suggested to complete following three steps:
      i Fill in the blanks in following statement, including insert your software name, the year of the first publication of your software, and your name identified as the copyright owner;
      ii Create a file named â€œLICENSE” which contains the whole context of this License in the first directory of your software package;
      iii Attach the statement to the appropriate annotated syntax at the beginning of each source file.
   Copyright (c) 2025 DengWenJie
   SmartJavaAI is licensed under Mulan PSL v2.
   You can use this software according to the terms and conditions of the Mulan PSL v2.
   You may obtain a copy of Mulan PSL v2 at:
               http://license.coscl.org.cn/MulanPSL2
   THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
   See the Mulan PSL v2 for more details.
README.md
ÎļþÒÑɾ³ý
pom.xml
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,426 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <groupId>com.xindao.ocr</groupId>
    <artifactId>ocr-tool</artifactId>
    <version>1.0-SNAPSHOT</version>
    <properties>
        <java.version>1.8</java.version>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
        <maven.compiler.source>8</maven.compiler.source>
        <maven.compiler.target>8</maven.compiler.target>
        <springboot.version>2.6.13</springboot.version>
        <djl.version>0.32.0</djl.version>
        <spring-boot.version>2.6.13</spring-boot.version>
        <easyexcel.version>3.3.4</easyexcel.version>
        <smartjavaai.version>1.0.24</smartjavaai.version>
        <javacv.version>1.5.10</javacv.version>
        <javacv.platform.macosx-arm64>macosx-arm64</javacv.platform.macosx-arm64>
        <javacv.platform.linux-x86_64>linux-x86_64</javacv.platform.linux-x86_64>
        <javacv.platform.linux-arm64>linux-arm64</javacv.platform.linux-arm64>
        <javacv.platform.windows-x86_64>windows-x86_64</javacv.platform.windows-x86_64>
        <djl.platform.windows-x86_64>win-x86_64</djl.platform.windows-x86_64>
        <djl.platform.linux-x86_64>linux-x86_64</djl.platform.linux-x86_64>
        <djl.platform.linux-aarch64>linux-aarch64</djl.platform.linux-aarch64>
        <djl.platform.osx-aarch64>osx-aarch64</djl.platform.osx-aarch64>
    </properties>
    <url>https://github.com/geekwenjie/SmartJavaAI</url>
    <licenses>
        <license>
            <name>MIT License</name>
            <url>https://opensource.org/licenses/MIT</url>
        </license>
    </licenses>
    <dependencies>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
            <version>${springboot.version}</version>
        </dependency>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>easyexcel</artifactId>
            <version>${easyexcel.version}</version>
            <exclusions>
                <exclusion>
                    <groupId>commons-io</groupId>
                    <artifactId>commons-io</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>org.testng</groupId>
            <artifactId>testng</artifactId>
            <version>7.4.0</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>commons-beanutils</groupId>
            <artifactId>commons-beanutils</artifactId>
            <version>1.9.4</version>
        </dependency>
        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-lang3</artifactId>
            <version>3.9</version>
        </dependency>
        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <version>1.18.4</version>
        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-api</artifactId>
            <version>1.7.30</version>
        </dependency>
        <dependency>
            <groupId>commons-cli</groupId>
            <artifactId>commons-cli</artifactId>
            <version>1.9.0</version>
        </dependency>
        <dependency>
            <groupId>commons-io</groupId>
            <artifactId>commons-io</artifactId>
            <version>2.17.0</version>
        </dependency>
        <!-- Apache Commons Pool2 -->
        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-pool2</artifactId>
            <version>2.12.0</version>
        </dependency>
        <dependency>
            <groupId>cn.hutool</groupId>
            <artifactId>hutool-system</artifactId>
            <version>5.8.16</version>
        </dependency>
        <dependency>
            <groupId>cn.hutool</groupId>
            <artifactId>hutool-setting</artifactId>
            <version>5.8.16</version>
        </dependency>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.83</version>
        </dependency>
        <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>pdfbox</artifactId>
            <version>2.0.28</version>
        </dependency>
        <!--OCR相关依赖-->
        <dependency>
            <groupId>cn.smartjavaai</groupId>
            <artifactId>common</artifactId>
            <version>${smartjavaai.version}</version>
        </dependency>
        <dependency>
            <groupId>dom4j</groupId>
            <artifactId>dom4j</artifactId>
            <version>1.6.1</version>
        </dependency>
        <dependency>
            <groupId>ai.djl</groupId>
            <artifactId>api</artifactId>
            <version>${djl.version}</version>
        </dependency>
        <dependency>
            <groupId>ai.djl</groupId>
            <artifactId>model-zoo</artifactId>
            <version>${djl.version}</version>
        </dependency>
        <dependency>
            <groupId>ai.djl.huggingface</groupId>
            <artifactId>tokenizers</artifactId>
            <version>${djl.version}</version>
        </dependency>
        <!-- MXNet -->
        <dependency>
            <groupId>ai.djl.mxnet</groupId>
            <artifactId>mxnet-model-zoo</artifactId>
            <version>${djl.version}</version>
        </dependency>
        <!-- Pytorch -->
        <dependency>
            <groupId>ai.djl.pytorch</groupId>
            <artifactId>pytorch-model-zoo</artifactId>
            <version>${djl.version}</version>
        </dependency>
        <!-- TensorFlow -->
        <dependency>
            <groupId>ai.djl.tensorflow</groupId>
            <artifactId>tensorflow-model-zoo</artifactId>
            <version>${djl.version}</version>
        </dependency>
        <dependency>
            <groupId>ai.djl.pytorch</groupId>
            <artifactId>pytorch-engine</artifactId>
            <version>${djl.version}</version>
            <scope>runtime</scope>
        </dependency>
        <dependency>
            <groupId>ai.djl.tensorflow</groupId>
            <artifactId>tensorflow-engine</artifactId>
            <version>${djl.version}</version>
            <scope>runtime</scope>
        </dependency>
        <dependency>
            <groupId>ai.djl.mxnet</groupId>
            <artifactId>mxnet-engine</artifactId>
            <version>${djl.version}</version>
            <scope>runtime</scope>
        </dependency>
        <dependency>
            <groupId>ai.djl.onnxruntime</groupId>
            <artifactId>onnxruntime-engine</artifactId>
            <version>${djl.version}</version>
        </dependency>
        <dependency>
            <groupId>ai.djl.opencv</groupId>
            <artifactId>opencv</artifactId>
            <version>${djl.version}</version>
        </dependency>
        <dependency>
            <groupId>gov.nist.math</groupId>
            <artifactId>jama</artifactId>
            <version>1.0.3</version>
        </dependency>
        <dependency>
            <groupId>org.bytedeco</groupId>
            <artifactId>javacv</artifactId>
            <version>1.5.10</version>
        </dependency>
        <dependency>
            <groupId>ai.djl.pytorch</groupId>
            <artifactId>pytorch-jni</artifactId>
            <version>2.5.1-0.32.0</version>
            <scope>runtime</scope>
        </dependency>
        <!-- windows平台 (保留对应平台的配置,可以减小包大小)-->
        <dependency>
            <groupId>org.bytedeco</groupId>
            <artifactId>javacpp</artifactId>
            <version>${javacv.version}</version>
            <classifier>${javacv.platform.windows-x86_64}</classifier>
        </dependency>
        <dependency>
            <groupId>org.bytedeco</groupId>
            <artifactId>openblas</artifactId>
            <version>0.3.26-1.5.10</version>
            <classifier>${javacv.platform.windows-x86_64}</classifier>
        </dependency>
        <dependency>
            <groupId>org.bytedeco</groupId>
            <artifactId>opencv</artifactId>
            <version>4.9.0-1.5.10</version>
            <classifier>${javacv.platform.windows-x86_64}</classifier>
        </dependency>
        <dependency>
            <groupId>ai.djl.pytorch</groupId>
            <artifactId>pytorch-native-cpu</artifactId>
            <classifier>${djl.platform.windows-x86_64}</classifier>
            <version>2.5.1</version>
            <scope>runtime</scope>
        </dependency>
        <!-- linux x86 å¹³å° (保留对应平台的配置,可以减小包大小)-->
        <!--        <dependency>-->
        <!--            <groupId>org.bytedeco</groupId>-->
        <!--            <artifactId>javacpp</artifactId>-->
        <!--            <version>${javacv.version}</version>-->
        <!--            <classifier>${javacv.platform.linux-x86_64}</classifier>-->
        <!--        </dependency>-->
        <!--        <dependency>-->
        <!--            <groupId>org.bytedeco</groupId>-->
        <!--            <artifactId>ffmpeg</artifactId>-->
        <!--            <version>6.1.1-1.5.10</version>-->
        <!--            <classifier>${javacv.platform.linux-x86_64}</classifier>-->
        <!--        </dependency>-->
        <!--        <dependency>-->
        <!--            <groupId>org.bytedeco</groupId>-->
        <!--            <artifactId>openblas</artifactId>-->
        <!--            <version>0.3.26-1.5.10</version>-->
        <!--            <classifier>${javacv.platform.linux-x86_64}</classifier>-->
        <!--        </dependency>-->
        <!--        <dependency>-->
        <!--            <groupId>org.bytedeco</groupId>-->
        <!--            <artifactId>opencv</artifactId>-->
        <!--            <version>4.9.0-1.5.10</version>-->
        <!--            <classifier>${javacv.platform.linux-x86_64}</classifier>-->
        <!--        </dependency>-->
        <!--        <dependency>-->
        <!--            <groupId>ai.djl.pytorch</groupId>-->
        <!--            <artifactId>pytorch-native-cpu</artifactId>-->
        <!--            <classifier>${djl.platform.linux-x86_64}</classifier>-->
        <!--            <version>2.5.1</version>-->
        <!--            <scope>runtime</scope>-->
        <!--        </dependency>-->
        <!--        <dependency>-->
        <!--            <groupId>ai.djl.pytorch</groupId>-->
        <!--            <artifactId>pytorch-native-cpu-precxx11</artifactId>-->
        <!--            <classifier>${djl.platform.linux-x86_64}</classifier>-->
        <!--            <version>2.5.1</version>-->
        <!--            <scope>runtime</scope>-->
        <!--        </dependency>-->
        <!-- macOS M系列 å¹³å°  (保留对应平台的配置,可以减小包大小)-->
        <!--        <dependency>-->
        <!--            <groupId>org.bytedeco</groupId>-->
        <!--            <artifactId>javacpp</artifactId>-->
        <!--            <version>${javacv.version}</version>-->
        <!--            <classifier>${javacv.platform.macosx-arm64}</classifier>-->
        <!--        </dependency>-->
        <!--        <dependency>-->
        <!--            <groupId>org.bytedeco</groupId>-->
        <!--            <artifactId>ffmpeg</artifactId>-->
        <!--            <version>6.1.1-1.5.10</version>-->
        <!--            <classifier>${javacv.platform.macosx-arm64}</classifier>-->
        <!--        </dependency>-->
        <!--        <dependency>-->
        <!--            <groupId>org.bytedeco</groupId>-->
        <!--            <artifactId>openblas</artifactId>-->
        <!--            <version>0.3.26-1.5.10</version>-->
        <!--            <classifier>${javacv.platform.macosx-arm64}</classifier>-->
        <!--        </dependency>-->
        <!--        <dependency>-->
        <!--            <groupId>org.bytedeco</groupId>-->
        <!--            <artifactId>opencv</artifactId>-->
        <!--            <version>4.9.0-1.5.10</version>-->
        <!--            <classifier>${javacv.platform.macosx-arm64}</classifier>-->
        <!--        </dependency>-->
        <!--        <dependency>-->
        <!--            <groupId>ai.djl.pytorch</groupId>-->
        <!--            <artifactId>pytorch-native-cpu</artifactId>-->
        <!--            <classifier>${djl.platform.osx-aarch64}</classifier>-->
        <!--            <version>2.5.1</version>-->
        <!--            <scope>runtime</scope>-->
        <!--        </dependency>-->
        <!-- linux aarch64 å¹³å° (保留对应平台的配置,可以减小包大小)-->
        <!--        <dependency>-->
        <!--            <groupId>org.bytedeco</groupId>-->
        <!--            <artifactId>javacpp</artifactId>-->
        <!--            <version>${javacv.version}</version>-->
        <!--            <classifier>${javacv.platform.linux-arm64}</classifier>-->
        <!--        </dependency>-->
        <!--        <dependency>-->
        <!--            <groupId>org.bytedeco</groupId>-->
        <!--            <artifactId>ffmpeg</artifactId>-->
        <!--            <version>6.1.1-1.5.10</version>-->
        <!--            <classifier>${javacv.platform.linux-arm64}</classifier>-->
        <!--        </dependency>-->
        <!--        <dependency>-->
        <!--            <groupId>org.bytedeco</groupId>-->
        <!--            <artifactId>openblas</artifactId>-->
        <!--            <version>0.3.26-1.5.10</version>-->
        <!--            <classifier>${javacv.platform.linux-arm64}</classifier>-->
        <!--        </dependency>-->
        <!--        <dependency>-->
        <!--            <groupId>org.bytedeco</groupId>-->
        <!--            <artifactId>opencv</artifactId>-->
        <!--            <version>4.9.0-1.5.10</version>-->
        <!--            <classifier>${javacv.platform.linux-arm64}</classifier>-->
        <!--        </dependency>-->
    </dependencies>
    <build>
        <resources>
            <resource>
                <directory>src/main/resources</directory>
                <filtering>true</filtering>
                <includes>
                    <include>logback-spring.xml</include>
                </includes>
            </resource>
            <resource>
                <directory>src/main/resources</directory>
                <filtering>false</filtering>
                <includes>
                    <include>PP_OCRv5/**</include>
                </includes>
            </resource>
        </resources>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.8.1</version>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                    <encoding>UTF-8</encoding>
                </configuration>
            </plugin>
            <plugin>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-maven-plugin</artifactId>
                <version>${spring-boot.version}</version>
                <configuration>
                    <mainClass>com.xindao.ocr.OcrToolApplication</mainClass>
                </configuration>
                <executions>
                    <execution>
                        <id>repackage</id>
                        <goals>
                            <goal>repackage</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>
    <!-- å¿…须添加:SCM信息 -->
    <scm>
        <connection>scm:git:git://github.com/geekwenjie/SmartJavaAI.git</connection>
        <developerConnection>scm:git:ssh://github.com/geekwenjie/SmartJavaAI.git</developerConnection>
        <url>http://github.com/geekwenjie/SmartJavaAI/tree/master</url>
    </scm>
    <developers>
        <developer>
            <name>dengwenjie</name>
            <email>775747758@qq.com</email>
            <roles>
                <role>Project Manager</role>
                <role>Architect</role>
            </roles>
        </developer>
    </developers>
</project>
src/main/java/com/xindao/ocr/OcrToolApplication.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,16 @@
package com.xindao.ocr;
import org.springframework.boot.WebApplicationType;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.builder.SpringApplicationBuilder;
@SpringBootApplication
public class OcrToolApplication {
    public static void main(String[] args) {
        SpringApplicationBuilder builder = new SpringApplicationBuilder(OcrToolApplication.class);
        builder.headless(false).web(WebApplicationType.NONE).run(args);
        System.out.println("OCR Tool Application is running...");
    }
}
src/main/java/com/xindao/ocr/smartjavaai/config/DirectionModelConfig.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,33 @@
package com.xindao.ocr.smartjavaai.config;
import cn.smartjavaai.common.config.ModelConfig;
import com.xindao.ocr.smartjavaai.enums.DirectionModelEnum;
import com.xindao.ocr.smartjavaai.model.common.detect.OcrCommonDetModel;
import lombok.Data;
/**
 * æ–‡æœ¬æ–¹å‘分类模型配置
 * @author dwj
 * @date 2025/4/22
 */
@Data
public class DirectionModelConfig extends ModelConfig {
    /**
     * æ¨¡åž‹
     */
    private DirectionModelEnum modelEnum;
    /**
     * æ£€æµ‹æ¨¡åž‹è·¯å¾„
     */
    private String modelPath;
    /**
     * æ–‡æœ¬æ£€æµ‹æ¨¡åž‹
     */
    private OcrCommonDetModel textDetModel;
}
src/main/java/com/xindao/ocr/smartjavaai/config/OcrDetModelConfig.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,26 @@
package com.xindao.ocr.smartjavaai.config;
import cn.smartjavaai.common.config.ModelConfig;
import com.xindao.ocr.smartjavaai.enums.CommonDetModelEnum;
import lombok.Data;
/**
 * OCR检测模型配置
 * @author dwj
 * @date 2025/4/22
 */
@Data
public class OcrDetModelConfig extends ModelConfig {
    /**
     * æ¨¡åž‹
     */
    private CommonDetModelEnum modelEnum;
    /**
     * æ£€æµ‹æ¨¡åž‹è·¯å¾„
     */
    private String detModelPath;
}
src/main/java/com/xindao/ocr/smartjavaai/config/OcrRecModelConfig.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,37 @@
package com.xindao.ocr.smartjavaai.config;
import cn.smartjavaai.common.config.ModelConfig;
import com.xindao.ocr.smartjavaai.enums.CommonRecModelEnum;
import com.xindao.ocr.smartjavaai.model.common.detect.OcrCommonDetModel;
import com.xindao.ocr.smartjavaai.model.common.direction.OcrDirectionModel;
import lombok.Data;
/**
 * OCR识别模型配置
 * @author dwj
 * @date 2025/4/22
 */
@Data
public class OcrRecModelConfig extends ModelConfig {
    /**
     * è¯†åˆ«æ¨¡åž‹
     */
    private CommonRecModelEnum recModelEnum;
    /**
     * è¯†åˆ«æ¨¡åž‹è·¯å¾„
     */
    private String recModelPath;
    /**
     * æ–‡æœ¬æ£€æµ‹æ¨¡åž‹
     */
    private OcrCommonDetModel textDetModel;
    /**
     * æ–‡æœ¬æ–¹å‘模型
     */
    private OcrDirectionModel directionModel;
}
src/main/java/com/xindao/ocr/smartjavaai/config/OcrRecOptions.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,31 @@
package com.xindao.ocr.smartjavaai.config;
import lombok.Data;
/**
 * OCR è¯†åˆ«é…ç½®
 *
 * @author dwj
 */
@Data
public class OcrRecOptions {
    /**
     * æ˜¯å¦è¿›è¡Œæ–‡æœ¬æ–¹å‘矫正
     */
    private boolean enableDirectionCorrect = false;
    /**
     * æ˜¯å¦è¿›è¡Œç»“果分行
     */
    private boolean enableLineSplit = true;
    public OcrRecOptions(boolean enableDirectionCorrect, boolean enableLineSplit) {
        this.enableDirectionCorrect = enableDirectionCorrect;
        this.enableLineSplit = enableLineSplit;
    }
    public OcrRecOptions() {
    }
}
src/main/java/com/xindao/ocr/smartjavaai/config/PlateDetModelConfig.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,40 @@
package com.xindao.ocr.smartjavaai.config;
import cn.smartjavaai.common.config.ModelConfig;
import com.xindao.ocr.smartjavaai.enums.PlateDetModelEnum;
import lombok.Data;
/**
 * è½¦ç‰Œæ£€æµ‹æ¨¡åž‹é…ç½®
 * @author dwj
 */
@Data
public class PlateDetModelConfig extends ModelConfig {
    /**
     * æ¨¡åž‹
     */
    private PlateDetModelEnum modelEnum;
    /**
     * æ£€æµ‹æ¨¡åž‹è·¯å¾„
     */
    private String modelPath;
    /**
     * ç½®ä¿¡åº¦é˜ˆå€¼
     */
    private float confidenceThreshold;
    /**
     * iou阈值
     */
    private float iouThreshold;
    /**
     * æ£€æµ‹ç»“果数量
     */
    private int topK;
}
src/main/java/com/xindao/ocr/smartjavaai/config/PlateRecModelConfig.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,32 @@
package com.xindao.ocr.smartjavaai.config;
import cn.smartjavaai.common.config.ModelConfig;
import com.xindao.ocr.smartjavaai.enums.PlateRecModelEnum;
import com.xindao.ocr.smartjavaai.model.plate.PlateDetModel;
import lombok.Data;
/**
 * è½¦ç‰Œè¯†åˆ«æ¨¡åž‹é…ç½®
 * @author dwj
 */
@Data
public class PlateRecModelConfig extends ModelConfig {
    /**
     * æ¨¡åž‹
     */
    private PlateRecModelEnum modelEnum;
    /**
     * æ£€æµ‹æ¨¡åž‹è·¯å¾„
     */
    private String modelPath;
    /**
     * è½¦ç‰Œæ£€æµ‹æ¨¡åž‹
     */
    private PlateDetModel plateDetModel;
}
src/main/java/com/xindao/ocr/smartjavaai/config/TableStructureConfig.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,25 @@
package com.xindao.ocr.smartjavaai.config;
import cn.smartjavaai.common.config.ModelConfig;
import com.xindao.ocr.smartjavaai.enums.TableStructureModelEnum;
import lombok.Data;
/**
 * OCR表格结构识别模型配置
 * @author dwj
 */
@Data
public class TableStructureConfig extends ModelConfig {
    /**
     * æ¨¡åž‹
     */
    private TableStructureModelEnum modelEnum;
    /**
     * æ£€æµ‹æ¨¡åž‹è·¯å¾„
     */
    private String modelPath;
}
src/main/java/com/xindao/ocr/smartjavaai/entity/DirectionInfo.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,41 @@
package com.xindao.ocr.smartjavaai.entity;
/**
 * æ–¹å‘检测结果
 * @author Calvin
 * @mail 179209347@qq.com
 * @website www.aias.top
 */
public class DirectionInfo {
    /**
     * æ–¹å‘ 0 90 180 270
     */
    private String name;
    /**
     * ç½®ä¿¡åº¦
     */
    private Double prob;
    public DirectionInfo(String name, Double prob) {
        this.name = name;
        this.prob = prob;
    }
    public String getName() {
        return name;
    }
    public void setName(String name) {
        this.name = name;
    }
    public Double getProb() {
        return prob;
    }
    public void setProb(Double prob) {
        this.prob = prob;
    }
}
src/main/java/com/xindao/ocr/smartjavaai/entity/IdCardInfo.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,12 @@
package com.xindao.ocr.smartjavaai.entity;
/**
 * èº«ä»½è¯ä¿¡æ¯
 * @author dwj
 * @date 2025/5/22
 */
public class IdCardInfo {
}
src/main/java/com/xindao/ocr/smartjavaai/entity/ImageInfo.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,50 @@
package com.xindao.ocr.smartjavaai.entity;
import ai.djl.modality.cv.Image;
import ai.djl.ndarray.NDArray;
/**
 * å›¾åƒä¿¡æ¯
 */
public class ImageInfo {
    private String name;
    private Double prob;
    private Image image;
    private NDArray box;
    public ImageInfo(Image image, NDArray box) {
        this.image = image;
        this.box = box;
    }
    public String getName() {
        return name;
    }
    public void setName(String name) {
        this.name = name;
    }
    public Double getProb() {
        return prob;
    }
    public void setProb(Double prob) {
        this.prob = prob;
    }
    public Image getImage() {
        return image;
    }
    public void setImage(Image image) {
        this.image = image;
    }
    public NDArray getBox() {
        return box;
    }
    public void setBox(NDArray box) {
        this.box = box;
    }
}
src/main/java/com/xindao/ocr/smartjavaai/entity/OcrBox.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,72 @@
package com.xindao.ocr.smartjavaai.entity;
import cn.smartjavaai.common.entity.DetectionRectangle;
import cn.smartjavaai.common.entity.Point;
import lombok.Data;
/**
 * OCR æ£€æµ‹æ¡†
 * @author dwj
 * @date 2025/5/20
 */
@Data
public class OcrBox {
    /**
     * å·¦ä¸Šè§’
     */
    private Point topLeft;
    /**
     * å³ä¸Šè§’
     */
    private Point topRight;
    /**
     * å³ä¸‹è§’
     */
    private Point bottomRight;
    /**
     * å·¦ä¸‹è§’
     */
    private Point bottomLeft;
    public OcrBox(Point topLeft, Point topRight, Point bottomRight, Point bottomLeft) {
        this.topLeft = topLeft;
        this.topRight = topRight;
        this.bottomRight = bottomRight;
        this.bottomLeft = bottomLeft;
    }
    public OcrBox() {
    }
    public float[] toFloatArray() {
        return new float[]{
                (float)topLeft.getX(), (float)topLeft.getY(),
                (float)topRight.getX(), (float)topRight.getY(),
                (float)bottomRight.getX(), (float)bottomRight.getY(),
                (float)bottomLeft.getX(), (float)bottomLeft.getY()
        };
    }
    /**
     * è½¬æ¢ä¸º DetectionRectangle,使用最小外包矩形
     */
    public DetectionRectangle toDetectionRectangle() {
        float[] pts = toFloatArray();
        float minX = Math.min(Math.min(pts[0], pts[2]), Math.min(pts[4], pts[6]));
        float minY = Math.min(Math.min(pts[1], pts[3]), Math.min(pts[5], pts[7]));
        float maxX = Math.max(Math.max(pts[0], pts[2]), Math.max(pts[4], pts[6]));
        float maxY = Math.max(Math.max(pts[1], pts[3]), Math.max(pts[5], pts[7]));
        DetectionRectangle rect = new DetectionRectangle();
        rect.setX((int) minX);
        rect.setY((int) minY);
        rect.setWidth((int) (maxX - minX));
        rect.setHeight((int) (maxY - minY));
        return rect;
    }
}
src/main/java/com/xindao/ocr/smartjavaai/entity/OcrInfo.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,37 @@
package com.xindao.ocr.smartjavaai.entity;
import lombok.Data;
import java.util.List;
import java.util.stream.Collectors;
/**
 * OCR信息
 * @author dwj
 * @date 2025/5/20
 */
@Data
public class OcrInfo {
    private List<List<OcrItem>> lineList;
    private List<OcrItem> ocrItemList;
    private String fullText;
    private String base64Img;
    public OcrInfo(List<List<OcrItem>> lineList, String fullText) {
        this.lineList = lineList;
        this.fullText = fullText;
    }
    public OcrInfo() {
    }
    public List<OcrItem> flattenLines() {
        return lineList.stream()
                .flatMap(List::stream)
                .collect(Collectors.toList());
    }
}
src/main/java/com/xindao/ocr/smartjavaai/entity/OcrItem.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,54 @@
package com.xindao.ocr.smartjavaai.entity;
import com.xindao.ocr.smartjavaai.enums.AngleEnum;
import lombok.Data;
/**
 * @author dwj
 * @date 2025/5/20
 */
@Data
public class OcrItem {
    /**
     * è¯†åˆ«æ¡†
     */
    private OcrBox ocrBox;
    /**
     * æ–‡æœ¬
     */
    private String text;
    /**
     * æ–¹å‘
     */
    private AngleEnum angle;
    /**
     * æ£€æµ‹å¾—分
     */
    private float score;
    public OcrItem(OcrBox ocrBox, String text) {
        this.ocrBox = ocrBox;
        this.text = text;
    }
    public OcrItem() {
    }
    public OcrItem(OcrBox ocrBox, String text, AngleEnum angle) {
        this.ocrBox = ocrBox;
        this.text = text;
        this.angle = angle;
    }
    public OcrItem(OcrBox ocrBox, AngleEnum angle, float score) {
        this.ocrBox = ocrBox;
        this.angle = angle;
        this.score = score;
    }
}
src/main/java/com/xindao/ocr/smartjavaai/entity/PlateInfo.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,45 @@
package com.xindao.ocr.smartjavaai.entity;
import cn.smartjavaai.common.entity.DetectionRectangle;
import com.xindao.ocr.smartjavaai.enums.PlateType;
import lombok.Data;
/**
 * è½¦ç‰Œè¯†åˆ«ä¿¡æ¯
 * @author dwj
 */
@Data
public class PlateInfo {
    /**
     * è½¦ç‰Œç±»åž‹
     */
    private PlateType plateType;
    /**
     * è½¦ç‰Œå·ç 
     */
    private String plateNumber;
    /**
     * è½¦ç‰Œé¢œè‰²
     */
    private String plateColor;
    /**
     * æ£€æµ‹ä½ç½®ä¿¡æ¯
     */
    private DetectionRectangle detectionRectangle;
    /**
     * è½¦ç‰Œ4角坐标
     */
    private OcrBox box;
    /**
     * æ£€æµ‹å¾—分
     */
    private float score;
}
src/main/java/com/xindao/ocr/smartjavaai/entity/PlateResult.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,35 @@
package com.xindao.ocr.smartjavaai.entity;
import lombok.Data;
/**
 * @author dwj
 */
@Data
public class PlateResult {
    /**
     * è½¦ç‰Œå·ç 
     */
    private String plateNo;
    /**
     * è½¦ç‰Œé¢œè‰²
     */
    private String plateColor;
    public PlateResult(String plateNo, String plateColor) {
        this.plateNo = plateNo;
        this.plateColor = plateColor;
    }
    @Override
    public String toString() {
        return "PlateResult{" +
                "plateNo='" + plateNo + '\'' +
                ", plateColor='" + plateColor + '\'' +
                '}';
    }
}
src/main/java/com/xindao/ocr/smartjavaai/entity/RotatedBox.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,46 @@
package com.xindao.ocr.smartjavaai.entity;
import ai.djl.ndarray.NDArray;
/**
 * æ—‹è½¬æ£€æµ‹æ¡†
 */
public class RotatedBox implements Comparable<RotatedBox> {
    private NDArray box;
    private String text;
    public RotatedBox(NDArray box, String text) {
        this.box = box;
        this.text = text;
    }
    /**
     * å°†å·¦ä¸Šè§’ Y åæ ‡å‡åºæŽ’序
     *
     * @param o
     * @return
     */
    @Override
    public int compareTo(RotatedBox o) {
        NDArray lowBox = this.getBox();
        NDArray highBox = o.getBox();
        float lowY = lowBox.toFloatArray()[1];
        float highY = highBox.toFloatArray()[1];
        return (lowY < highY) ? -1 : 1;
    }
    public NDArray getBox() {
        return box;
    }
    public void setBox(NDArray box) {
        this.box = box;
    }
    public String getText() {
        return text;
    }
    public void setText(String text) {
        this.text = text;
    }
}
src/main/java/com/xindao/ocr/smartjavaai/entity/RotatedBoxCompX.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,47 @@
package com.xindao.ocr.smartjavaai.entity;
import ai.djl.ndarray.NDArray;
/**
 * æ—‹è½¬æ£€æµ‹æ¡† - æ”¯æŒå·¦ä¸Šè§’ X åæ ‡å‡åºæŽ’序
 */
public class RotatedBoxCompX implements Comparable<RotatedBoxCompX> {
    private NDArray box;
    private String text;
    public RotatedBoxCompX(NDArray box, String text) {
        this.box = box;
        this.text = text;
    }
    /**
     * å°†å·¦ä¸Šè§’ X åæ ‡å‡åºæŽ’序
     *
     * @param o
     * @return
     */
    @Override
    public int compareTo(RotatedBoxCompX o) {
        NDArray leftBox = this.getBox();
        NDArray rightBox = o.getBox();
        float leftX = leftBox.toFloatArray()[0];
        float rightX = rightBox.toFloatArray()[0];
        return (leftX < rightX) ? -1 : 1;
    }
    public NDArray getBox() {
        return box;
    }
    public void setBox(NDArray box) {
        this.box = box;
    }
    public String getText() {
        return text;
    }
    public void setText(String text) {
        this.text = text;
    }
}
src/main/java/com/xindao/ocr/smartjavaai/entity/TableStructureResult.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,33 @@
package com.xindao.ocr.smartjavaai.entity;
import lombok.Data;
import java.util.List;
/**
 * @author dwj
 */
@Data
public class TableStructureResult {
    private List<OcrItem> ocrItemList;
    private List<String> tableTagList;
    private String html;
    public TableStructureResult(List<OcrItem> ocrItemList, List<String> tableTagList) {
        this.ocrItemList = ocrItemList;
        this.tableTagList = tableTagList;
    }
    public TableStructureResult() {
    }
    public TableStructureResult(List<OcrItem> ocrItemList, List<String> tableTagList, String html) {
        this.ocrItemList = ocrItemList;
        this.tableTagList = tableTagList;
        this.html = html;
    }
}
src/main/java/com/xindao/ocr/smartjavaai/enums/AngleEnum.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,38 @@
package com.xindao.ocr.smartjavaai.enums;
/**
 * æ–‡æœ¬æ–¹å‘
 * @author dwj
 * @date 2025/5/23
 */
public enum AngleEnum {
    ANGLE_0("0"),
    ANGLE_90("90"),
    ANGLE_180("180"),
    ANGLE_270("270");
    private final String value;
    AngleEnum(String value) {
        this.value = value;
    }
    public String getValue() {
        return value;
    }
    public static AngleEnum fromValue(String value) {
        for (AngleEnum angle : values()) {
            if (angle.value.equals(value)) {
                return angle;
            }
        }
        throw new IllegalArgumentException("Invalid angle value: " + value);
    }
    @Override
    public String toString() {
        return value + "°";
    }
}
src/main/java/com/xindao/ocr/smartjavaai/enums/CommonDetModelEnum.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,32 @@
package com.xindao.ocr.smartjavaai.enums;
/**
 * OCR检测模型枚举
 * @author dwj
 */
public enum CommonDetModelEnum {
    PP_OCR_V5_SERVER_DET_MODEL,
    PP_OCR_V5_MOBILE_DET_MODEL,
    PP_OCR_V4_SERVER_DET_MODEL,
    PP_OCR_V4_MOBILE_DET_MODEL;
    /**
     * æ ¹æ®åç§°èŽ·å–æžšä¸¾ (忽略大小写和下划线变体)
     */
    public static CommonDetModelEnum fromName(String name) {
        String formatted = name.trim().toUpperCase().replaceAll("[-_]", "");
        for (CommonDetModelEnum model : values()) {
            if (model.name().replaceAll("_", "").equals(formatted)) {
                return model;
            }
        }
        throw new IllegalArgumentException("未知模型名称: " + name);
    }
}
src/main/java/com/xindao/ocr/smartjavaai/enums/CommonRecModelEnum.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,32 @@
package com.xindao.ocr.smartjavaai.enums;
/**
 * OCR识别模型枚举
 * @author dwj
 */
public enum CommonRecModelEnum {
    PP_OCR_V5_SERVER_REC_MODEL,
    PP_OCR_V5_MOBILE_REC_MODEL,
    PP_OCR_V4_SERVER_REC_MODEL,
    PP_OCR_V4_MOBILE_REC_MODEL;
    /**
     * æ ¹æ®åç§°èŽ·å–æžšä¸¾ (忽略大小写和下划线变体)
     */
    public static CommonRecModelEnum fromName(String name) {
        String formatted = name.trim().toUpperCase().replaceAll("[-_]", "");
        for (CommonRecModelEnum model : values()) {
            if (model.name().replaceAll("_", "").equals(formatted)) {
                return model;
            }
        }
        throw new IllegalArgumentException("未知模型名称: " + name);
    }
}
src/main/java/com/xindao/ocr/smartjavaai/enums/DirectionModelEnum.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,32 @@
package com.xindao.ocr.smartjavaai.enums;
/**
 * OCR文本方向分类模型枚举
 * @author dwj
 * @date 2025/4/4
 */
public enum DirectionModelEnum {
    CH_PPOCR_MOBILE_V2_CLS,
    PP_LCNET_X0_25,
    PP_LCNET_X1_0;
    /**
     * æ ¹æ®åç§°èŽ·å–æžšä¸¾ (忽略大小写和下划线变体)
     */
    public static DirectionModelEnum fromName(String name) {
        String formatted = name.trim().toUpperCase().replaceAll("[-_]", "");
        for (DirectionModelEnum model : values()) {
            if (model.name().replaceAll("_", "").equals(formatted)) {
                return model;
            }
        }
        throw new IllegalArgumentException("未知模型名称: " + name);
    }
}
src/main/java/com/xindao/ocr/smartjavaai/enums/PlateDetModelEnum.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,28 @@
package com.xindao.ocr.smartjavaai.enums;
/**
 * è½¦ç‰Œæ£€æµ‹æ¨¡åž‹æžšä¸¾
 * @author dwj
 */
public enum PlateDetModelEnum {
    YOLOV5,
    YOLOV7;
    /**
     * æ ¹æ®åç§°èŽ·å–æžšä¸¾ (忽略大小写和下划线变体)
     */
    public static PlateDetModelEnum fromName(String name) {
        String formatted = name.trim().toUpperCase().replaceAll("[-_]", "");
        for (PlateDetModelEnum model : values()) {
            if (model.name().replaceAll("_", "").equals(formatted)) {
                return model;
            }
        }
        throw new IllegalArgumentException("未知模型名称: " + name);
    }
}
src/main/java/com/xindao/ocr/smartjavaai/enums/PlateRecModelEnum.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,26 @@
package com.xindao.ocr.smartjavaai.enums;
/**
 * è½¦ç‰Œè¯†åˆ«æ¨¡åž‹æžšä¸¾
 * @author dwj
 */
public enum PlateRecModelEnum {
    PLATE_REC_CRNN;
    /**
     * æ ¹æ®åç§°èŽ·å–æžšä¸¾ (忽略大小写和下划线变体)
     */
    public static PlateRecModelEnum fromName(String name) {
        String formatted = name.trim().toUpperCase().replaceAll("[-_]", "");
        for (PlateRecModelEnum model : values()) {
            if (model.name().replaceAll("_", "").equals(formatted)) {
                return model;
            }
        }
        throw new IllegalArgumentException("未知模型名称: " + name);
    }
}
src/main/java/com/xindao/ocr/smartjavaai/enums/PlateType.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,45 @@
package com.xindao.ocr.smartjavaai.enums;
/**
 * @author dwj
 */
public enum PlateType {
    SINGLE("single", "单层"),
    DOUBLE("double", "双层"),
    UNKNOWN("unknown", "未知");
    private final String className;
    private final String description;
    PlateType(String className, String description) {
        this.className = className;
        this.description = description;
    }
    public String getClassName() {
        return className;
    }
    public String getDescription() {
        return description;
    }
    /**
     * æ ¹æ®value获取对应的PlateType
     * @param className
     * @return PlateType
     */
    public static PlateType fromClassName(String className) {
        for (PlateType type : values()) {
            if (type.className.equals(className)) {
                return type;
            }
        }
        return null;
    }
}
src/main/java/com/xindao/ocr/smartjavaai/enums/TableStructureModelEnum.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,30 @@
package com.xindao.ocr.smartjavaai.enums;
/**
 * OCR表格结构模型枚举
 * @author dwj
 */
public enum TableStructureModelEnum {
    SLANET,
    //SLANEXT_WIRED,
    SLANET_PLUS;
    /**
     * æ ¹æ®åç§°èŽ·å–æžšä¸¾ (忽略大小写和下划线变体)
     */
    public static TableStructureModelEnum fromName(String name) {
        String formatted = name.trim().toUpperCase().replaceAll("[-_]", "");
        for (TableStructureModelEnum model : values()) {
            if (model.name().replaceAll("_", "").equals(formatted)) {
                return model;
            }
        }
        throw new IllegalArgumentException("未知模型名称: " + name);
    }
}
src/main/java/com/xindao/ocr/smartjavaai/exception/OcrException.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,30 @@
package com.xindao.ocr.smartjavaai.exception;
/**
 * OCR异常
 * @author dwj
 * @date 2025/4/4
 */
public class OcrException extends RuntimeException{
    public OcrException() {
        super();
    }
    public OcrException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) {
        super(message, cause, enableSuppression, writableStackTrace);
    }
    public OcrException(String message, Throwable cause) {
        super(message, cause);
    }
    public OcrException(String message) {
        super(message);
    }
    public OcrException(Throwable cause) {
        super(cause);
    }
}
src/main/java/com/xindao/ocr/smartjavaai/factory/OcrModelFactory.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,223 @@
package com.xindao.ocr.smartjavaai.factory;
import cn.smartjavaai.common.config.Config;
import com.xindao.ocr.smartjavaai.config.DirectionModelConfig;
import com.xindao.ocr.smartjavaai.config.OcrDetModelConfig;
import com.xindao.ocr.smartjavaai.config.OcrRecModelConfig;
import com.xindao.ocr.smartjavaai.enums.CommonDetModelEnum;
import com.xindao.ocr.smartjavaai.enums.CommonRecModelEnum;
import com.xindao.ocr.smartjavaai.enums.DirectionModelEnum;
import com.xindao.ocr.smartjavaai.exception.OcrException;
import com.xindao.ocr.smartjavaai.model.common.detect.OcrCommonDetModel;
import com.xindao.ocr.smartjavaai.model.common.detect.OcrCommonDetModelImpl;
import com.xindao.ocr.smartjavaai.model.common.direction.OcrDirectionModel;
import com.xindao.ocr.smartjavaai.model.common.direction.PPOCRMobileV2ClsModel;
import com.xindao.ocr.smartjavaai.model.common.recognize.OcrCommonRecModel;
import com.xindao.ocr.smartjavaai.model.common.recognize.OcrCommonRecModelImpl;
import lombok.extern.slf4j.Slf4j;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
/**
 * OCR模型工厂
 * @author dwj
 */
@Slf4j
public class OcrModelFactory {
    // ä½¿ç”¨ volatile å’ŒåŒé‡æ£€æŸ¥é”å®šæ¥ç¡®ä¿çº¿ç¨‹å®‰å…¨çš„单例模式
    private static volatile OcrModelFactory instance;
    private static final ConcurrentHashMap<CommonDetModelEnum, OcrCommonDetModel> commonDetModelMap = new ConcurrentHashMap<>();
    private static final ConcurrentHashMap<CommonRecModelEnum, OcrCommonRecModel> commonRecModelMap = new ConcurrentHashMap<>();
    private static final ConcurrentHashMap<DirectionModelEnum, OcrDirectionModel> directionModelMap = new ConcurrentHashMap<>();
    /**
     * æ£€æµ‹æ¨¡åž‹æ³¨å†Œè¡¨
     */
    private static final Map<CommonDetModelEnum, Class<? extends OcrCommonDetModel>> commonDetRegistry =
            new ConcurrentHashMap<>();
    /**
     * è¯†åˆ«æ¨¡åž‹æ³¨å†Œè¡¨
     */
    private static final Map<CommonRecModelEnum, Class<? extends OcrCommonRecModel>> commonRecRegistry =
            new ConcurrentHashMap<>();
    /**
     * æ–¹å‘分类模型注册表
     */
    private static final Map<DirectionModelEnum, Class<? extends OcrDirectionModel>> directionRegistry =
            new ConcurrentHashMap<>();
    public static OcrModelFactory getInstance() {
        if (instance == null) {
            synchronized (OcrModelFactory.class) {
                if (instance == null) {
                    instance = new OcrModelFactory();
                }
            }
        }
        return instance;
    }
    /**
     * æ³¨å†Œé€šç”¨æ£€æµ‹æ¨¡åž‹
     * @param detModelEnum
     * @param clazz
     */
    private static void registerCommonDetModel(CommonDetModelEnum detModelEnum, Class<? extends OcrCommonDetModel> clazz) {
        commonDetRegistry.put(detModelEnum, clazz);
    }
    /**
     * æ³¨å†Œé€šç”¨è¯†åˆ«æ¨¡åž‹
     * @param recModelEnum
     * @param clazz
     */
    private static void registerCommonRecModel(CommonRecModelEnum recModelEnum, Class<? extends OcrCommonRecModel> clazz) {
        commonRecRegistry.put(recModelEnum, clazz);
    }
    /**
     * æ³¨å†Œé€šç”¨æ–¹å‘分类模型
     * @param directionModelEnum
     * @param clazz
     */
    private static void registerDirectionModel(DirectionModelEnum directionModelEnum, Class<? extends OcrDirectionModel> clazz) {
        directionRegistry.put(directionModelEnum, clazz);
    }
    /**
     * èŽ·å–æ£€æµ‹æ¨¡åž‹ï¼ˆé€šè¿‡é…ç½®ï¼‰
     * @param config
     * @return
     */
    public OcrCommonDetModel getDetModel(OcrDetModelConfig config) {
        if(Objects.isNull(config) || Objects.isNull(config.getModelEnum())){
            throw new OcrException("未配置OCR模型");
        }
        return commonDetModelMap.computeIfAbsent(config.getModelEnum(), k -> {
            return createCommonDetModel(config);
        });
    }
    /**
     * èŽ·å–è¯†åˆ«æ¨¡åž‹ï¼ˆé€šè¿‡é…ç½®ï¼‰
     * @param config
     * @return
     */
    public OcrCommonRecModel getRecModel(OcrRecModelConfig config) {
        if(Objects.isNull(config) || Objects.isNull(config.getRecModelEnum())){
            throw new OcrException("未配置OCR模型");
        }
        return commonRecModelMap.computeIfAbsent(config.getRecModelEnum(), k -> {
            return createCommonRecModel(config);
        });
    }
    /**
     * èŽ·å–æ¨¡åž‹ï¼ˆé€šè¿‡é…ç½®ï¼‰
     * @param config
     * @return
     */
    public OcrDirectionModel getDirectionModel(DirectionModelConfig config) {
        if(Objects.isNull(config) || Objects.isNull(config.getModelEnum())){
            throw new OcrException("未配置OCR模型");
        }
        return directionModelMap.computeIfAbsent(config.getModelEnum(), k -> {
            return createDirectionModel(config);
        });
    }
    /**
     * åˆ›å»ºOCR通用检测模型
     * @param config
     * @return
     */
    private OcrCommonDetModel createCommonDetModel(OcrDetModelConfig config) {
        Class<?> clazz = commonDetRegistry.get(config.getModelEnum());
        if(clazz == null){
            throw new OcrException("Unsupported model");
        }
        OcrCommonDetModel model = null;
        try {
            model = (OcrCommonDetModel) clazz.newInstance();
        } catch (InstantiationException | IllegalAccessException e) {
            throw new OcrException(e);
        }
        model.loadModel(config);
        return model;
    }
    /**
     * åˆ›å»ºOCR通用识别模型
     * @param config
     * @return
     */
    private OcrCommonRecModel createCommonRecModel(OcrRecModelConfig config) {
        Class<?> clazz = commonRecRegistry.get(config.getRecModelEnum());
        if(clazz == null){
            throw new OcrException("Unsupported model");
        }
        OcrCommonRecModel model = null;
        try {
            model = (OcrCommonRecModel) clazz.newInstance();
        } catch (InstantiationException | IllegalAccessException e) {
            throw new OcrException(e);
        }
        model.loadModel(config);
        return model;
    }
    /**
     * åˆ›å»ºOCR方向分类模型
     * @param config
     * @return
     */
    private OcrDirectionModel createDirectionModel(DirectionModelConfig config) {
        Class<?> clazz = directionRegistry.get(config.getModelEnum());
        if(clazz == null){
            throw new OcrException("Unsupported model");
        }
        OcrDirectionModel model = null;
        try {
            model = (OcrDirectionModel) clazz.newInstance();
        } catch (InstantiationException | IllegalAccessException e) {
            throw new OcrException(e);
        }
        model.loadModel(config);
        return model;
    }
    // åˆå§‹åŒ–默认算法
    static {
        //通用-检测模型
        registerCommonDetModel(CommonDetModelEnum.PP_OCR_V5_SERVER_DET_MODEL, OcrCommonDetModelImpl.class);
        registerCommonDetModel(CommonDetModelEnum.PP_OCR_V5_MOBILE_DET_MODEL, OcrCommonDetModelImpl.class);
        registerCommonDetModel(CommonDetModelEnum.PP_OCR_V4_SERVER_DET_MODEL, OcrCommonDetModelImpl.class);
        registerCommonDetModel(CommonDetModelEnum.PP_OCR_V4_MOBILE_DET_MODEL, OcrCommonDetModelImpl.class);
        registerCommonRecModel(CommonRecModelEnum.PP_OCR_V5_SERVER_REC_MODEL, OcrCommonRecModelImpl.class);
        registerCommonRecModel(CommonRecModelEnum.PP_OCR_V5_MOBILE_REC_MODEL, OcrCommonRecModelImpl.class);
        registerCommonRecModel(CommonRecModelEnum.PP_OCR_V4_SERVER_REC_MODEL, OcrCommonRecModelImpl.class);
        registerCommonRecModel(CommonRecModelEnum.PP_OCR_V4_MOBILE_REC_MODEL, OcrCommonRecModelImpl.class);
        registerDirectionModel(DirectionModelEnum.CH_PPOCR_MOBILE_V2_CLS, PPOCRMobileV2ClsModel.class);
        registerDirectionModel(DirectionModelEnum.PP_LCNET_X0_25, PPOCRMobileV2ClsModel.class);
        registerDirectionModel(DirectionModelEnum.PP_LCNET_X1_0, PPOCRMobileV2ClsModel.class);
        log.debug("缓存目录:{}", Config.getCachePath());
    }
}
src/main/java/com/xindao/ocr/smartjavaai/factory/PlateModelFactory.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,164 @@
package com.xindao.ocr.smartjavaai.factory;
import cn.smartjavaai.common.config.Config;
import com.xindao.ocr.smartjavaai.config.PlateDetModelConfig;
import com.xindao.ocr.smartjavaai.config.PlateRecModelConfig;
import com.xindao.ocr.smartjavaai.enums.PlateDetModelEnum;
import com.xindao.ocr.smartjavaai.enums.PlateRecModelEnum;
import com.xindao.ocr.smartjavaai.exception.OcrException;
import com.xindao.ocr.smartjavaai.model.plate.CRNNPlateRecModel;
import com.xindao.ocr.smartjavaai.model.plate.PlateDetModel;
import com.xindao.ocr.smartjavaai.model.plate.PlateRecModel;
import com.xindao.ocr.smartjavaai.model.plate.Yolov5PlateDetModel;
import lombok.extern.slf4j.Slf4j;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
/**
 * è½¦ç‰Œè¯†åˆ«æ¨¡åž‹å·¥åŽ‚
 * @author dwj
 */
@Slf4j
public class PlateModelFactory {
    // ä½¿ç”¨ volatile å’ŒåŒé‡æ£€æŸ¥é”å®šæ¥ç¡®ä¿çº¿ç¨‹å®‰å…¨çš„单例模式
    private static volatile PlateModelFactory instance;
    /**
     * æ¨¡åž‹ç¼“å­˜
     */
    private static final ConcurrentHashMap<PlateDetModelEnum, PlateDetModel> detModelMap = new ConcurrentHashMap<>();
    /**
     * æ¨¡åž‹ç¼“å­˜
     */
    private static final ConcurrentHashMap<PlateRecModelEnum, PlateRecModel> recModelMap = new ConcurrentHashMap<>();
    /**
     * æ¨¡åž‹æ³¨å†Œè¡¨
     */
    private static final Map<PlateDetModelEnum, Class<? extends PlateDetModel>> detModelRegistry =
            new ConcurrentHashMap<>();
    /**
     * æ¨¡åž‹æ³¨å†Œè¡¨
     */
    private static final Map<PlateRecModelEnum, Class<? extends PlateRecModel>> recModelRegistry =
            new ConcurrentHashMap<>();
    public static PlateModelFactory getInstance() {
        if (instance == null) {
            synchronized (PlateModelFactory.class) {
                if (instance == null) {
                    instance = new PlateModelFactory();
                }
            }
        }
        return instance;
    }
    /**
     * æ³¨å†Œæ¨¡åž‹
     * @param plateDetModelEnum
     * @param clazz
     */
    private static void registerDetModel(PlateDetModelEnum plateDetModelEnum, Class<? extends PlateDetModel> clazz) {
        detModelRegistry.put(plateDetModelEnum, clazz);
    }
    /**
     * æ³¨å†Œæ¨¡åž‹
     * @param plateRecModelEnum
     * @param clazz
     */
    private static void registerRecModel(PlateRecModelEnum plateRecModelEnum, Class<? extends PlateRecModel> clazz) {
        recModelRegistry.put(plateRecModelEnum, clazz);
    }
    /**
     * èŽ·å–æ¨¡åž‹
     * @param config
     * @return
     */
    public PlateDetModel getDetModel(PlateDetModelConfig config) {
        if(Objects.isNull(config) || Objects.isNull(config.getModelEnum())){
            throw new OcrException("未配置OCR模型");
        }
        return detModelMap.computeIfAbsent(config.getModelEnum(), k -> {
            return createDetModel(config);
        });
    }
    /**
     * èŽ·å–æ¨¡åž‹
     * @param config
     * @return
     */
    public PlateRecModel getRecModel(PlateRecModelConfig config) {
        if(Objects.isNull(config) || Objects.isNull(config.getModelEnum())){
            throw new OcrException("未配置OCR模型");
        }
        return recModelMap.computeIfAbsent(config.getModelEnum(), k -> {
            return createRecModel(config);
        });
    }
    /**
     * åˆ›å»ºæ£€æµ‹æ¨¡åž‹
     * @param config
     * @return
     */
    private PlateDetModel createDetModel(PlateDetModelConfig config) {
        Class<?> clazz = detModelRegistry.get(config.getModelEnum());
        if(clazz == null){
            throw new OcrException("Unsupported model");
        }
        PlateDetModel model = null;
        try {
            model = (PlateDetModel) clazz.newInstance();
        } catch (InstantiationException | IllegalAccessException e) {
            throw new OcrException(e);
        }
        model.loadModel(config);
        return model;
    }
    /**
     * åˆ›å»ºè¯†åˆ«æ¨¡åž‹
     * @param config
     * @return
     */
    private PlateRecModel createRecModel(PlateRecModelConfig config) {
        Class<?> clazz = recModelRegistry.get(config.getModelEnum());
        if(clazz == null){
            throw new OcrException("Unsupported model");
        }
        PlateRecModel model = null;
        try {
            model = (PlateRecModel) clazz.newInstance();
        } catch (InstantiationException | IllegalAccessException e) {
            throw new OcrException(e);
        }
        model.loadModel(config);
        return model;
    }
    // åˆå§‹åŒ–默认算法
    static {
        registerDetModel(PlateDetModelEnum.YOLOV5, Yolov5PlateDetModel.class);
        registerDetModel(PlateDetModelEnum.YOLOV7, Yolov5PlateDetModel.class);
        registerRecModel(PlateRecModelEnum.PLATE_REC_CRNN, CRNNPlateRecModel.class);
        log.debug("缓存目录:{}", Config.getCachePath());
    }
}
src/main/java/com/xindao/ocr/smartjavaai/factory/TableRecModelFactory.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,107 @@
package com.xindao.ocr.smartjavaai.factory;
import cn.smartjavaai.common.config.Config;
import com.xindao.ocr.smartjavaai.config.TableStructureConfig;
import com.xindao.ocr.smartjavaai.enums.TableStructureModelEnum;
import com.xindao.ocr.smartjavaai.exception.OcrException;
import com.xindao.ocr.smartjavaai.model.table.CommonTableStructureModel;
import com.xindao.ocr.smartjavaai.model.table.TableStructureModel;
import lombok.extern.slf4j.Slf4j;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
/**
 * OCR è¡¨æ ¼è¯†åˆ«æ¨¡åž‹å·¥åŽ‚
 * @author dwj
 */
@Slf4j
public class TableRecModelFactory {
    // ä½¿ç”¨ volatile å’ŒåŒé‡æ£€æŸ¥é”å®šæ¥ç¡®ä¿çº¿ç¨‹å®‰å…¨çš„单例模式
    private static volatile TableRecModelFactory instance;
    /**
     * æ¨¡åž‹ç¼“å­˜
     */
    private static final ConcurrentHashMap<TableStructureModelEnum, TableStructureModel> tableStructureModelMap = new ConcurrentHashMap<>();
    /**
     * æ¨¡åž‹æ³¨å†Œè¡¨
     */
    private static final Map<TableStructureModelEnum, Class<? extends TableStructureModel>> tableStructureRegistry =
            new ConcurrentHashMap<>();
    public static TableRecModelFactory getInstance() {
        if (instance == null) {
            synchronized (TableRecModelFactory.class) {
                if (instance == null) {
                    instance = new TableRecModelFactory();
                }
            }
        }
        return instance;
    }
    /**
     * æ³¨å†Œæ¨¡åž‹
     * @param tableStructureModelEnum
     * @param clazz
     */
    private static void registerTableStructureModel(TableStructureModelEnum tableStructureModelEnum, Class<? extends TableStructureModel> clazz) {
        tableStructureRegistry.put(tableStructureModelEnum, clazz);
    }
    /**
     * èŽ·å–æ¨¡åž‹ï¼ˆé€šè¿‡é…ç½®ï¼‰
     * @param config
     * @return
     */
    public TableStructureModel getTableStructureModel(TableStructureConfig config) {
        if(Objects.isNull(config) || Objects.isNull(config.getModelEnum())){
            throw new OcrException("未配置OCR模型");
        }
        return tableStructureModelMap.computeIfAbsent(config.getModelEnum(), k -> {
            return createTableStructureModel(config);
        });
    }
    /**
     * åˆ›å»ºæ¨¡åž‹
     * @param config
     * @return
     */
    private TableStructureModel createTableStructureModel(TableStructureConfig config) {
        Class<?> clazz = tableStructureRegistry.get(config.getModelEnum());
        if(clazz == null){
            throw new OcrException("Unsupported model");
        }
        TableStructureModel model = null;
        try {
            model = (TableStructureModel) clazz.newInstance();
        } catch (InstantiationException | IllegalAccessException e) {
            throw new OcrException(e);
        }
        model.loadModel(config);
        return model;
    }
    // åˆå§‹åŒ–默认算法
    static {
        registerTableStructureModel(TableStructureModelEnum.SLANET, CommonTableStructureModel.class);
        registerTableStructureModel(TableStructureModelEnum.SLANET_PLUS, CommonTableStructureModel.class);
        log.debug("缓存目录:{}", Config.getCachePath());
    }
}
src/main/java/com/xindao/ocr/smartjavaai/model/common/detect/OcrCommonDetModel.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,104 @@
package com.xindao.ocr.smartjavaai.model.common.detect;
import ai.djl.inference.Predictor;
import ai.djl.modality.cv.Image;
import ai.djl.ndarray.NDList;
import com.xindao.ocr.smartjavaai.config.OcrDetModelConfig;
import com.xindao.ocr.smartjavaai.entity.OcrBox;
import org.apache.commons.pool2.impl.GenericObjectPool;
import java.awt.image.BufferedImage;
import java.util.List;
/**
 * OCR é€šç”¨æ£€æµ‹æ¨¡åž‹
 * @author dwj
 */
public interface OcrCommonDetModel extends AutoCloseable{
    /**
     * åŠ è½½æ¨¡åž‹
     * @param config
     */
    void loadModel(OcrDetModelConfig config); // åŠ è½½æ¨¡åž‹
    /**
     * æ–‡æœ¬æ£€æµ‹
     * @param imagePath å›¾ç‰‡è·¯å¾„
     * @return
     */
    default List<OcrBox> detect(String imagePath) {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * æ–‡æœ¬æ£€æµ‹
     * @param image BufferedImage
     * @return
     */
    default List<OcrBox> detect(BufferedImage image) {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * æ–‡æœ¬æ£€æµ‹
     * @param imageData å›¾ç‰‡å­—节数组
     * @return
     */
    default List<OcrBox> detect(byte[] imageData) {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * æ–‡æœ¬æ£€æµ‹
     * @param image DJL Image
     * @return
     */
    default List<OcrBox> detect(Image image){
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * æ£€æµ‹å¹¶ç»˜åˆ¶ç»“æžœ
     * @param imagePath å›¾ç‰‡è¾“入路径(包含文件名称)
     * @param outputPath å›¾ç‰‡è¾“出路径(包含文件名称)
     */
    default void detectAndDraw(String imagePath, String outputPath) {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * æ£€æµ‹å¹¶ç»˜åˆ¶ç»“æžœ
     * @param sourceImage
     * @return
     */
    default BufferedImage detectAndDraw(BufferedImage sourceImage){
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * æ–‡æœ¬æ£€æµ‹ï¼ˆæ‰¹é‡ï¼‰
     * @param imageList BufferedImage
     * @return
     */
    default List<List<OcrBox>> batchDetect(List<BufferedImage> imageList) {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * æ–‡æœ¬æ£€æµ‹ï¼ˆæ‰¹é‡ï¼‰
     * @param imageList DJL Image
     * @return
     */
    default List<List<OcrBox>> batchDetectDJLImage(List<Image> imageList){
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    default GenericObjectPool<Predictor<Image, NDList>> getPool(){
        throw new UnsupportedOperationException("默认不支持该功能");
    }
}
src/main/java/com/xindao/ocr/smartjavaai/model/common/detect/OcrCommonDetModelImpl.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,244 @@
package com.xindao.ocr.smartjavaai.model.common.detect;
import ai.djl.MalformedModelException;
import ai.djl.engine.Engine;
import ai.djl.inference.Predictor;
import ai.djl.modality.cv.Image;
import ai.djl.modality.cv.ImageFactory;
import ai.djl.ndarray.NDList;
import ai.djl.ndarray.NDManager;
import ai.djl.repository.zoo.Criteria;
import ai.djl.repository.zoo.ModelNotFoundException;
import ai.djl.repository.zoo.ModelZoo;
import ai.djl.repository.zoo.ZooModel;
import cn.smartjavaai.common.pool.PredictorFactory;
import cn.smartjavaai.common.utils.FileUtils;
import cn.smartjavaai.common.utils.ImageUtils;
import cn.smartjavaai.common.utils.OpenCVUtils;
import com.xindao.ocr.smartjavaai.config.OcrDetModelConfig;
import com.xindao.ocr.smartjavaai.entity.OcrBox;
import com.xindao.ocr.smartjavaai.exception.OcrException;
import com.xindao.ocr.smartjavaai.model.common.detect.criteria.OcrCommonDetCriterialFactory;
import com.xindao.ocr.smartjavaai.utils.OcrUtils;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.pool2.impl.GenericObjectPool;
import org.opencv.core.Mat;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
/**
 * ocr通用检测模型实现类
 * @author dwj
 */
@Slf4j
public class OcrCommonDetModelImpl implements OcrCommonDetModel{
    private GenericObjectPool<Predictor<Image, NDList>> detPredictorPool;
    private ZooModel<Image, NDList> detectionModel;
    private OcrDetModelConfig config;
    @Override
    public void loadModel(OcrDetModelConfig config){
        if(StringUtils.isBlank(config.getDetModelPath())){
            throw new OcrException("modelPath is null");
        }
        this.config = config;
        //初始化 æ£€æµ‹Criteria
        Criteria<Image, NDList> detCriteria = OcrCommonDetCriterialFactory.createCriteria(config);
        try{
            detectionModel = ModelZoo.loadModel(detCriteria);
            // åˆ›å»ºæ± å­ï¼šæ¯ä¸ªçº¿ç¨‹ç‹¬äº« Predictor
            this.detPredictorPool = new GenericObjectPool<>(new PredictorFactory<>(detectionModel));
            int predictorPoolSize = config.getPredictorPoolSize();
            if(config.getPredictorPoolSize() <= 0){
                predictorPoolSize = Runtime.getRuntime().availableProcessors(); // é»˜è®¤ç­‰äºŽCPU核心数
            }
            detPredictorPool.setMaxTotal(predictorPoolSize);
            log.debug("当前设备: " + detectionModel.getNDManager().getDevice());
            log.debug("当前引擎: " + Engine.getInstance().getEngineName());
            log.debug("模型推理器线程池最大数量: " + predictorPoolSize);
        } catch (IOException | ModelNotFoundException | MalformedModelException e) {
            throw new OcrException("检测模型加载失败", e);
        }
    }
    @Override
    public List<OcrBox> detect(String imagePath){
        if(!FileUtils.isFileExists(imagePath)){
            throw new OcrException("图像文件不存在");
        }
        Image img = null;
        try {
            img = ImageFactory.getInstance().fromFile(Paths.get(imagePath));
        } catch (IOException e) {
            throw new OcrException("无效的图片", e);
        }
        List<OcrBox> ocrBoxList = detect(img);
        ((Mat)img.getWrappedImage()).release();
        return ocrBoxList;
    }
    @Override
    public List<OcrBox> detect(Image image){
        List<Image> imageList = Collections.singletonList(image);
        List<List<OcrBox>> result = batchDetectDJLImage(imageList);
        return result.get(0);
    }
    @Override
    public void detectAndDraw(String imagePath, String outputPath) {
        if(!FileUtils.isFileExists(imagePath)){
            throw new OcrException("图像文件不存在");
        }
        try {
            Image img = ImageFactory.getInstance().fromFile(Paths.get(imagePath));
            List<OcrBox> boxList = detect(img);
            if(Objects.isNull(boxList) || boxList.isEmpty()){
                throw new OcrException("未检测到文字");
            }
            OcrUtils.drawRect((Mat)img.getWrappedImage(), boxList);
            Path output = Paths.get(outputPath);
            log.debug("Saving to {}", output.toAbsolutePath().toString());
            img.save(Files.newOutputStream(output), "png");
            ((Mat) img.getWrappedImage()).release();
        } catch (IOException e) {
            throw new OcrException(e);
        }
    }
    @Override
    public List<OcrBox> detect(BufferedImage image) {
        if(!ImageUtils.isImageValid(image)){
            throw new OcrException("图像无效");
        }
        Image img = ImageFactory.getInstance().fromImage(OpenCVUtils.image2Mat(image));
        List<OcrBox> ocrBoxList = detect(img);
        ((Mat)img.getWrappedImage()).release();
        return ocrBoxList;
    }
    @Override
    public List<OcrBox> detect(byte[] imageData) {
        if(Objects.isNull(imageData)){
            throw new OcrException("图像无效");
        }
        try {
            BufferedImage image = ImageIO.read(new ByteArrayInputStream(imageData));
            return detect(image);
        } catch (IOException e) {
            throw new OcrException("错误的图像", e);
        }
    }
    @Override
    public BufferedImage detectAndDraw(BufferedImage sourceImage) {
        if(!ImageUtils.isImageValid(sourceImage)){
            throw new OcrException("图像无效");
        }
        Image img = ImageFactory.getInstance().fromImage(OpenCVUtils.image2Mat(sourceImage));
        List<OcrBox> ocrBoxList = detect(img);
        if(Objects.isNull(ocrBoxList) || ocrBoxList.isEmpty()){
            throw new OcrException("未检测到文字");
        }
        OcrUtils.drawRect((Mat)img.getWrappedImage(), ocrBoxList);
        try {
            ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
            // è°ƒç”¨ save æ–¹æ³•å°† Image å†™å…¥å­—节流
            img.save(outputStream, "png");
            // å°†å­—节流转换为 BufferedImage
            byte[] imageBytes = outputStream.toByteArray();
            return ImageIO.read(new ByteArrayInputStream(imageBytes));
        } catch (IOException e) {
            throw new OcrException("导出图片失败", e);
        } finally {
            if (img != null){
                ((Mat) img.getWrappedImage()).release();
            }
        }
    }
    @Override
    public List<List<OcrBox>> batchDetect(List<BufferedImage> imageList) {
        List<Image> djlImageList = new ArrayList<>(imageList.size());
        try {
            for (BufferedImage bufferedImage : imageList) {
                djlImageList.add(ImageFactory.getInstance().fromImage(OpenCVUtils.image2Mat(bufferedImage)));
            }
            return batchDetectDJLImage(djlImageList);
        } catch (Exception e) {
            throw new OcrException(e);
        } finally {
            djlImageList.forEach(image -> ((Mat)image.getWrappedImage()).release());
        }
    }
    @Override
    public List<List<OcrBox>> batchDetectDJLImage(List<Image> imageList) {
        if(!ImageUtils.isAllImageSizeEqual(imageList)){
            throw new OcrException("图片尺寸不一致");
        }
        Predictor<Image, NDList> predictor = null;
        try (NDManager manager = NDManager.newBaseManager()) {
            predictor = detPredictorPool.borrowObject();
            List<NDList> result = predictor.batchPredict(imageList);
            result.forEach(ndList -> ndList.attach(manager));
            return OcrUtils.convertToOcrBox(result);
        } catch (Exception e) {
            throw new OcrException("OCR检测错误", e);
        }finally {
            if (predictor != null) {
                try {
                    detPredictorPool.returnObject(predictor); //归还
                } catch (Exception e) {
                    log.warn("归还Predictor失败", e);
                    try {
                        predictor.close(); // å½’还失败才销毁
                    } catch (Exception ex) {
                        log.error("关闭Predictor失败", ex);
                    }
                }
            }
        }
    }
    @Override
    public GenericObjectPool<Predictor<Image, NDList>> getPool() {
        return detPredictorPool;
    }
    @Override
    public void close() throws Exception {
        try {
            if (detPredictorPool != null) {
                detPredictorPool.close();
            }
        } catch (Exception e) {
            log.warn("关闭 predictorPool å¤±è´¥", e);
        }
        try {
            if (detectionModel != null) {
                detectionModel.close();
            }
        } catch (Exception e) {
            log.warn("关闭 model å¤±è´¥", e);
        }
    }
}
src/main/java/com/xindao/ocr/smartjavaai/model/common/detect/criteria/OcrCommonDetCriterialFactory.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,53 @@
package com.xindao.ocr.smartjavaai.model.common.detect.criteria;
import ai.djl.Device;
import ai.djl.modality.cv.Image;
import ai.djl.ndarray.NDList;
import ai.djl.repository.zoo.Criteria;
import ai.djl.training.util.ProgressBar;
import cn.smartjavaai.common.enums.DeviceEnum;
import com.xindao.ocr.smartjavaai.config.OcrDetModelConfig;
import com.xindao.ocr.smartjavaai.enums.CommonDetModelEnum;
import com.xindao.ocr.smartjavaai.model.common.detect.translator.PPOCRDetTranslator;
import org.apache.commons.lang3.StringUtils;
import java.nio.file.Paths;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
/**
 * @author dwj
 * @date 2025/7/8
 */
public class OcrCommonDetCriterialFactory {
    public static Criteria<Image, NDList> createCriteria(OcrDetModelConfig config) {
        Device device = null;
        if(!Objects.isNull(config.getDevice())){
            device = config.getDevice() == DeviceEnum.CPU ? Device.cpu() : Device.gpu(config.getGpuId());
        }
        Criteria<Image, NDList> criteria = null;
        ConcurrentHashMap params = new ConcurrentHashMap<String, String>();
        params.putAll(config.getCustomParams());
        if(StringUtils.isNotBlank(config.getBatchifier())){
            params.put("batchifier", config.getBatchifier());
        }
        if(config.getModelEnum() == CommonDetModelEnum.PP_OCR_V5_SERVER_DET_MODEL ||
                config.getModelEnum() == CommonDetModelEnum.PP_OCR_V5_MOBILE_DET_MODEL ||
                config.getModelEnum() == CommonDetModelEnum.PP_OCR_V4_SERVER_DET_MODEL ||
                config.getModelEnum() == CommonDetModelEnum.PP_OCR_V4_MOBILE_DET_MODEL
        ){
            criteria =
                    Criteria.builder()
                            .optEngine("OnnxRuntime")
                            .setTypes(Image.class, NDList.class)
                            .optModelPath(Paths.get(config.getDetModelPath()))
                            .optTranslator(new PPOCRDetTranslator(params))
                            .optDevice(device)
                            .optProgress(new ProgressBar())
                            .build();
        }
        return criteria;
    }
}
src/main/java/com/xindao/ocr/smartjavaai/model/common/detect/translator/PPOCRDetTranslator.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,536 @@
package com.xindao.ocr.smartjavaai.model.common.detect.translator;
import ai.djl.modality.cv.Image;
import ai.djl.modality.cv.util.NDImageUtils;
import ai.djl.ndarray.NDArray;
import ai.djl.ndarray.NDArrays;
import ai.djl.ndarray.NDList;
import ai.djl.ndarray.NDManager;
import ai.djl.ndarray.index.NDIndex;
import ai.djl.ndarray.types.DataType;
import ai.djl.ndarray.types.Shape;
import ai.djl.translate.Batchifier;
import ai.djl.translate.Translator;
import ai.djl.translate.TranslatorContext;
import com.xindao.ocr.smartjavaai.opencv.OcrNDArrayUtils;
import org.opencv.core.*;
import org.opencv.imgproc.Imgproc;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
 * æ–‡å­—检测前后处理
 *
 * @author Calvin
 * @mail 179209347@qq.com
 * @website www.aias.top
 */
public class PPOCRDetTranslator implements Translator<Image, NDList> {
    // det_algorithm == "DB"
    private final float thresh = 0.3f;
    private final boolean use_dilation = false;
    private final String score_mode = "fast";
    private final String box_type = "quad";
    //检测的图像边长限制
    private final int limit_side_len;
    //输出的最大文本框数量
    private final int max_candidates;
    //文本框最小尺寸阈值
    private final int min_size;
    //文本框的分数阈值
    private final float box_thresh;
    /**
     * è¿™ä¸ªå‚数是检测后处理时控制文本框大小的,默认1.6,可以尝试改成2.5或者更大,反之,如果觉得文本框不够紧凑,也可以把该参数调小。
     * æ£€æµ‹æ¡†å¤§å°è¿‡äºŽç´§è´´æ–‡å­—或检测框过大,可以调整db_unclip_ratio这个参数,加大参数可以扩大检测框,减小参数可以减小检测框大小;
     */
    private final float unclip_ratio;
    private float ratio_h;
    private float ratio_w;
    private int img_height;
    private int img_width;
    private String batchifier;
    public PPOCRDetTranslator(Map<String, ?> arguments) {
        limit_side_len =
                arguments.containsKey("limit_side_len")
                        ? Integer.parseInt(arguments.get("limit_side_len").toString())
                        : 960;
        max_candidates =
                arguments.containsKey("max_candidates")
                        ? Integer.parseInt(arguments.get("max_candidates").toString())
                        : 1000;
        min_size =
                arguments.containsKey("min_size")
                        ? Integer.parseInt(arguments.get("min_size").toString())
                        : 3;
        box_thresh =
                arguments.containsKey("box_thresh")
                        ? Float.parseFloat(arguments.get("box_thresh").toString())
                        : 0.6f; // 0.5f
        unclip_ratio =
                arguments.containsKey("unclip_ratio")
                        ? Float.parseFloat(arguments.get("unclip_ratio").toString())
                        : 1.6f;
        batchifier =  arguments.containsKey("batchifier")
                ? arguments.get("batchifier").toString()
                : "stack";
    }
    @Override
    public NDList processOutput(TranslatorContext ctx, NDList list) {
        NDManager manager = ctx.getNDManager();
        NDArray pred = list.get(0);
        pred = pred.squeeze();
        NDArray segmentation = pred.gt(thresh);   // thresh=0.3 .mul(255f)
        segmentation = segmentation.toType(DataType.UINT8, true);
        Shape shape = segmentation.getShape();
        int rows = (int) shape.get(0);
        int cols = (int) shape.get(1);
        Mat newMask = new Mat();
        if (this.use_dilation) {
            Mat mask = new Mat();
            //convert from NDArray to Mat
            Mat srcMat = OcrNDArrayUtils.uint8NDArrayToMat(segmentation);
            // size è¶Šå°ï¼Œè…èš€çš„单位越小,图片越接近原图
            // Mat dilation_kernel = Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(2, 2));
            Mat dilation_kernel = OcrNDArrayUtils.uint8ArrayToMat(new byte[][]{{1, 1}, {1, 1}});
            /**
             * è†¨èƒ€è¯´æ˜Žï¼š å›¾åƒçš„一部分区域与指定的核进行卷积, æ±‚核的最`大`值并赋值给指定区域。 è†¨èƒ€å¯ä»¥ç†è§£ä¸ºå›¾åƒä¸­`高亮区域`的'领域扩大'。
             * æ„æ€æ˜¯é«˜äº®éƒ¨åˆ†ä¼šä¾µèš€ä¸æ˜¯é«˜äº®çš„部分,使高亮部分越来越多。
             */
            Imgproc.dilate(srcMat, mask, dilation_kernel);
            //destination Matrix
            Scalar scalar = new Scalar(255);
            Core.multiply(mask, scalar, newMask);
            // release Mat
            mask.release();
            srcMat.release();
            dilation_kernel.release();
        } else {
            Mat srcMat = OcrNDArrayUtils.uint8NDArrayToMat(segmentation);
            //destination Matrix
            Scalar scalar = new Scalar(255);
            Core.multiply(srcMat, scalar, newMask);
            // release Mat
            srcMat.release();
        }
        NDArray boxes = boxes_from_bitmap(manager, pred, newMask);
        //boxes[:, :, 0] = boxes[:, :, 0] / ratio_w
        NDArray boxes1 = boxes.get(":, :, 0").div(ratio_w);
        boxes.set(new NDIndex(":, :, 0"), boxes1);
        //boxes[:, :, 1] = boxes[:, :, 1] / ratio_h
        NDArray boxes2 = boxes.get(":, :, 1").div(ratio_h);
        boxes.set(new NDIndex(":, :, 1"), boxes2);
        NDList dt_boxes = this.filter_tag_det_res(boxes);
        dt_boxes.detach();
        // release Mat
        newMask.release();
        return dt_boxes;
    }
    private NDList filter_tag_det_res(NDArray dt_boxes) {
        NDList boxesList = new NDList();
        int num = (int) dt_boxes.getShape().get(0);
        for (int i = 0; i < num; i++) {
            NDArray box = dt_boxes.get(i);
            box = order_points_clockwise(box);
            box = clip_det_res(box);
            float[] box0 = box.get(0).toFloatArray();
            float[] box1 = box.get(1).toFloatArray();
            float[] box3 = box.get(3).toFloatArray();
            int rect_width = (int) Math.sqrt(Math.pow(box1[0] - box0[0], 2) + Math.pow(box1[1] - box0[1], 2));
            int rect_height = (int) Math.sqrt(Math.pow(box3[0] - box0[0], 2) + Math.pow(box3[1] - box0[1], 2));
            if (rect_width <= 3 || rect_height <= 3)
                continue;
            boxesList.add(box);
        }
        return boxesList;
    }
    private NDArray clip_det_res(NDArray points) {
        for (int i = 0; i < points.getShape().get(0); i++) {
            int value = Math.max((int) points.get(i, 0).toFloatArray()[0], 0);
            value = Math.min(value, img_width - 1);
            points.set(new NDIndex(i + ",0"), value);
            value = Math.max((int) points.get(i, 1).toFloatArray()[0], 0);
            value = Math.min(value, img_height - 1);
            points.set(new NDIndex(i + ",1"), value);
        }
        return points;
    }
    /**
     * sort the points based on their x-coordinates
     * é¡ºæ—¶é’ˆ
     *
     * @param pts
     * @return
     */
    private NDArray order_points_clockwise(NDArray pts) {
        NDList list = new NDList();
        long[] indexes = pts.get(":, 0").argSort().toLongArray();
        // grab the left-most and right-most points from the sorted
        // x-roodinate points
        Shape s1 = pts.getShape();
        NDArray leftMost1 = pts.get(indexes[0] + ",:");
        NDArray leftMost2 = pts.get(indexes[1] + ",:");
        NDArray leftMost = leftMost1.concat(leftMost2).reshape(2, 2);
        NDArray rightMost1 = pts.get(indexes[2] + ",:");
        NDArray rightMost2 = pts.get(indexes[3] + ",:");
        NDArray rightMost = rightMost1.concat(rightMost2).reshape(2, 2);
        // now, sort the left-most coordinates according to their
        // y-coordinates so we can grab the top-left and bottom-left
        // points, respectively
        indexes = leftMost.get(":, 1").argSort().toLongArray();
        NDArray lt = leftMost.get(indexes[0] + ",:");
        NDArray lb = leftMost.get(indexes[1] + ",:");
        indexes = rightMost.get(":, 1").argSort().toLongArray();
        NDArray rt = rightMost.get(indexes[0] + ",:");
        NDArray rb = rightMost.get(indexes[1] + ",:");
        list.add(lt);
        list.add(rt);
        list.add(rb);
        list.add(lb);
        NDArray rect = NDArrays.concat(list).reshape(4, 2);
        return rect;
    }
    /**
     * Get boxes from the binarized image predicted by DB
     *
     * @param manager
     * @param pred    the binarized image predicted by DB.
     * @param bitmap  new 'pred' after threshold filtering.
     */
    private NDArray boxes_from_bitmap(NDManager manager, NDArray pred, Mat bitmap) {
        int dest_height = (int) pred.getShape().get(0);
        int dest_width = (int) pred.getShape().get(1);
        int height = bitmap.rows();
        int width = bitmap.cols();
        List<MatOfPoint> contours = new ArrayList<>();
        Mat hierarchy = new Mat();
        // å¯»æ‰¾è½®å»“
        Imgproc.findContours(
                bitmap,
                contours,
                hierarchy,
                Imgproc.RETR_LIST,
                Imgproc.CHAIN_APPROX_SIMPLE);
        int num_contours = Math.min(contours.size(), max_candidates);
        NDList boxList = new NDList();
        float[] scores = new float[num_contours];
        for (int index = 0; index < num_contours; index++) {
            MatOfPoint contour = contours.get(index);
            MatOfPoint2f newContour = new MatOfPoint2f(contour.toArray());
            float[][] pointsArr = new float[4][2];
            int sside = get_mini_boxes(newContour, pointsArr);
            if (sside < this.min_size)
                continue;
            NDArray points = manager.create(pointsArr);
            float score = box_score_fast(manager, pred, points);
            if (score < this.box_thresh)
                continue;
            NDArray box = unclip(manager, points); // TODO get_mini_boxes(box)
            // box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0, dest_width)
            NDArray boxes1 = box.get(":,0").div(width).mul(dest_width).round().clip(0, dest_width);
            box.set(new NDIndex(":, 0"), boxes1);
            // box[:, 1] = np.clip(np.round(box[:, 1] / height * dest_height), 0, dest_height)
            NDArray boxes2 = box.get(":,1").div(height).mul(dest_height).round().clip(0, dest_height);
            box.set(new NDIndex(":, 1"), boxes2);
            boxList.add(box);
            scores[index] = score;
            // release memory
            contour.release();
            newContour.release();
        }
        NDArray boxes = NDArrays.stack(boxList);
        // release
        hierarchy.release();
        return boxes;
    }
    /**
     * Shrink or expand the boxaccording to 'unclip_ratio'
     *
     * @param points The predicted box.
     * @return uncliped box
     */
    private NDArray unclip(NDManager manager, NDArray points) {
        points = order_points_clockwise(points);
        float[] pointsArr = points.toFloatArray();
        float[] lt = java.util.Arrays.copyOfRange(pointsArr, 0, 2);
        float[] lb = java.util.Arrays.copyOfRange(pointsArr, 6, 8);
        float[] rt = java.util.Arrays.copyOfRange(pointsArr, 2, 4);
        float[] rb = java.util.Arrays.copyOfRange(pointsArr, 4, 6);
        float width = distance(lt, rt);
        float height = distance(lt, lb);
        if (width > height) {
            float k = (lt[1] - rt[1]) / (lt[0] - rt[0]); // y = k * x + b
            float delta_dis = height;
            float delta_x = (float) Math.sqrt((delta_dis * delta_dis) / (k * k + 1));
            float delta_y = Math.abs(k * delta_x);
            if (k > 0) {
                pointsArr[0] = lt[0] - delta_x + delta_y;
                pointsArr[1] = lt[1] - delta_y - delta_x;
                pointsArr[2] = rt[0] + delta_x + delta_y;
                pointsArr[3] = rt[1] + delta_y - delta_x;
                pointsArr[4] = rb[0] + delta_x - delta_y;
                pointsArr[5] = rb[1] + delta_y + delta_x;
                pointsArr[6] = lb[0] - delta_x - delta_y;
                pointsArr[7] = lb[1] - delta_y + delta_x;
            } else {
                pointsArr[0] = lt[0] - delta_x - delta_y;
                pointsArr[1] = lt[1] + delta_y - delta_x;
                pointsArr[2] = rt[0] + delta_x - delta_y;
                pointsArr[3] = rt[1] - delta_y - delta_x;
                pointsArr[4] = rb[0] + delta_x + delta_y;
                pointsArr[5] = rb[1] - delta_y + delta_x;
                pointsArr[6] = lb[0] - delta_x + delta_y;
                pointsArr[7] = lb[1] + delta_y + delta_x;
            }
        } else {
            float k = (lt[1] - rt[1]) / (lt[0] - rt[0]); // y = k * x + b
            float delta_dis = width;
            float delta_y = (float) Math.sqrt((delta_dis * delta_dis) / (k * k + 1));
            float delta_x = Math.abs(k * delta_y);
            if (k > 0) {
                pointsArr[0] = lt[0] + delta_x - delta_y;
                pointsArr[1] = lt[1] - delta_y - delta_x;
                pointsArr[2] = rt[0] + delta_x + delta_y;
                pointsArr[3] = rt[1] - delta_y + delta_x;
                pointsArr[4] = rb[0] - delta_x + delta_y;
                pointsArr[5] = rb[1] + delta_y + delta_x;
                pointsArr[6] = lb[0] - delta_x - delta_y;
                pointsArr[7] = lb[1] + delta_y - delta_x;
            } else {
                pointsArr[0] = lt[0] - delta_x - delta_y;
                pointsArr[1] = lt[1] - delta_y + delta_x;
                pointsArr[2] = rt[0] - delta_x + delta_y;
                pointsArr[3] = rt[1] - delta_y - delta_x;
                pointsArr[4] = rb[0] + delta_x + delta_y;
                pointsArr[5] = rb[1] + delta_y - delta_x;
                pointsArr[6] = lb[0] + delta_x - delta_y;
                pointsArr[7] = lb[1] + delta_y + delta_x;
            }
        }
        points = manager.create(pointsArr).reshape(4, 2);
        return points;
    }
    private float distance(float[] point1, float[] point2) {
        float disX = point1[0] - point2[0];
        float disY = point1[1] - point2[1];
        float dis = (float) Math.sqrt(disX * disX + disY * disY);
        return dis;
    }
    /**
     * Get boxes from the contour or box.
     *
     * @param contour   The predicted contour.
     * @param pointsArr The predicted box.
     * @return smaller side of box
     */
    private int get_mini_boxes(MatOfPoint2f contour, float[][] pointsArr) {
        // https://blog.csdn.net/qq_37385726/article/details/82313558
        // bounding_box[1] - rect è¿”回矩形的长和宽
        RotatedRect rect = Imgproc.minAreaRect(contour);
        Mat points = new Mat();
        Imgproc.boxPoints(rect, points);
        float[][] fourPoints = new float[4][2];
        for (int row = 0; row < 4; row++) {
            fourPoints[row][0] = (float) points.get(row, 0)[0];
            fourPoints[row][1] = (float) points.get(row, 1)[0];
        }
        float[] tmpPoint = new float[2];
        for (int i = 0; i < 4; i++) {
            for (int j = i + 1; j < 4; j++) {
                if (fourPoints[j][0] < fourPoints[i][0]) {
                    tmpPoint[0] = fourPoints[i][0];
                    tmpPoint[1] = fourPoints[i][1];
                    fourPoints[i][0] = fourPoints[j][0];
                    fourPoints[i][1] = fourPoints[j][1];
                    fourPoints[j][0] = tmpPoint[0];
                    fourPoints[j][1] = tmpPoint[1];
                }
            }
        }
        int index_1 = 0;
        int index_2 = 1;
        int index_3 = 2;
        int index_4 = 3;
        if (fourPoints[1][1] > fourPoints[0][1]) {
            index_1 = 0;
            index_4 = 1;
        } else {
            index_1 = 1;
            index_4 = 0;
        }
        if (fourPoints[3][1] > fourPoints[2][1]) {
            index_2 = 2;
            index_3 = 3;
        } else {
            index_2 = 3;
            index_3 = 2;
        }
        pointsArr[0] = fourPoints[index_1];
        pointsArr[1] = fourPoints[index_2];
        pointsArr[2] = fourPoints[index_3];
        pointsArr[3] = fourPoints[index_4];
        int height = rect.boundingRect().height;
        int width = rect.boundingRect().width;
        int sside = Math.min(height, width);
        // release
        points.release();
        return sside;
    }
    /**
     * Calculate the score of box.
     *
     * @param bitmap The binarized image predicted by DB.
     * @param points The predicted box
     * @return
     */
    private float box_score_fast(NDManager manager, NDArray bitmap, NDArray points) {
        NDArray box = points.get(":");
        long h = bitmap.getShape().get(0);
        long w = bitmap.getShape().get(1);
        // xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1)
        int xmin = box.get(":, 0").min().floor().clip(0, w - 1).toType(DataType.INT32, true).toIntArray()[0];
        int xmax = box.get(":, 0").max().ceil().clip(0, w - 1).toType(DataType.INT32, true).toIntArray()[0];
        int ymin = box.get(":, 1").min().floor().clip(0, h - 1).toType(DataType.INT32, true).toIntArray()[0];
        int ymax = box.get(":, 1").max().ceil().clip(0, h - 1).toType(DataType.INT32, true).toIntArray()[0];
        NDArray mask = manager.zeros(new Shape(ymax - ymin + 1, xmax - xmin + 1), DataType.UINT8);
        box.set(new NDIndex(":, 0"), box.get(":, 0").sub(xmin));
        box.set(new NDIndex(":, 1"), box.get(":, 1").sub(ymin));
        //mask - convert from NDArray to Mat
        Mat maskMat = OcrNDArrayUtils.uint8NDArrayToMat(mask);
        //mask - convert from NDArray to Mat - 4 rows, 2 cols
        Mat boxMat = OcrNDArrayUtils.floatNDArrayToMat(box, CvType.CV_32S);
//        boxMat.reshape(1, new int[]{1, 4, 2});
        List<MatOfPoint> pts = new ArrayList<>();
        MatOfPoint matOfPoint = OcrNDArrayUtils.matToMatOfPoint(boxMat); // new MatOfPoint(boxMat);
        pts.add(matOfPoint);
        Imgproc.fillPoly(maskMat, pts, new Scalar(1));
        NDArray subBitMap = bitmap.get(ymin + ":" + (ymax + 1) + "," + xmin + ":" + (xmax + 1));
        Mat bitMapMat = OcrNDArrayUtils.floatNDArrayToMat(subBitMap);
        Scalar score = Core.mean(bitMapMat, maskMat);
        float scoreValue = (float) score.val[0];
        // release
        maskMat.release();
        boxMat.release();
        bitMapMat.release();
        return scoreValue;
    }
    @Override
    public NDList processInput(TranslatorContext ctx, Image input) {
        NDArray img = input.toNDArray(ctx.getNDManager());
        int h = input.getHeight();
        int w = input.getWidth();
        img_height = h;
        img_width = w;
        // limit the max side
        float ratio = 1.0f;
        if (Math.max(h, w) > limit_side_len) {
            if (h > w) {
                ratio = (float) limit_side_len / (float) h;
            } else {
                ratio = (float) limit_side_len / (float) w;
            }
        }
        int resize_h = (int) (h * ratio);
        int resize_w = (int) (w * ratio);
        resize_h = Math.round((float) resize_h / 32f) * 32;
        resize_w = Math.round((float) resize_w / 32f) * 32;
        ratio_h = resize_h / (float) h;
        ratio_w = resize_w / (float) w;
        img = NDImageUtils.resize(img, resize_w, resize_h);
        img = NDImageUtils.toTensor(img);
        img =
                NDImageUtils.normalize(
                        img,
                        new float[]{0.485f, 0.456f, 0.406f},
                        new float[]{0.229f, 0.224f, 0.225f});
//        img = img.expandDims(0);
        return new NDList(img);
    }
    @Override
    public Batchifier getBatchifier() {
        return Batchifier.fromString(batchifier);
    }
}
src/main/java/com/xindao/ocr/smartjavaai/model/common/direction/OcrDirectionModel.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,112 @@
package com.xindao.ocr.smartjavaai.model.common.direction;
import ai.djl.inference.Predictor;
import ai.djl.modality.cv.Image;
import com.xindao.ocr.smartjavaai.config.DirectionModelConfig;
import com.xindao.ocr.smartjavaai.entity.DirectionInfo;
import com.xindao.ocr.smartjavaai.entity.OcrBox;
import com.xindao.ocr.smartjavaai.entity.OcrItem;
import com.xindao.ocr.smartjavaai.model.common.detect.OcrCommonDetModel;
import org.apache.commons.pool2.impl.GenericObjectPool;
import org.opencv.core.Mat;
import java.awt.image.BufferedImage;
import java.util.List;
/**
 * OCR æ–‡æœ¬æ–¹å‘分类模型
 * @author dwj
 */
public interface OcrDirectionModel extends AutoCloseable{
    default void setTextDetModel(OcrCommonDetModel detModel){
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    default OcrCommonDetModel getTextDetModel(){
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * åŠ è½½æ¨¡åž‹
     * @param config
     */
    void loadModel(DirectionModelConfig config); // åŠ è½½æ¨¡åž‹
    /**
     * æ–‡æœ¬æ–¹å‘检测
     * @param imagePath å›¾ç‰‡è·¯å¾„
     * @return
     */
    default List<OcrItem> detect(String imagePath) {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * æ–‡æœ¬æ–¹å‘检测
     * @param image BufferedImage
     * @return
     */
    default List<OcrItem> detect(BufferedImage image) {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * æ–‡æœ¬æ–¹å‘检测
     * @param imageData å›¾ç‰‡å­—节数组
     * @return
     */
    default List<OcrItem> detect(byte[] imageData) {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * æ–‡æœ¬æ–¹å‘检测
     * @param image
     * @return
     */
    default List<OcrItem> detect(Image image) {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * æ–‡æœ¬æ–¹å‘检测(基于检测结果)
     * @param boxList
     * @param srcMat
     * @param manager
     * @return
     */
    default List<OcrItem> detect(List<OcrBox> boxList, Mat srcMat) {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    default List<List<OcrItem>> batchDetect(List<List<OcrBox>> boxList, List<Mat> srcMatList) {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * æ£€æµ‹å¹¶ç»˜åˆ¶ç»“æžœ
     * @param imagePath å›¾ç‰‡è¾“入路径(包含文件名称)
     * @param outputPath å›¾ç‰‡è¾“出路径(包含文件名称)
     */
    default void detectAndDraw(String imagePath, String outputPath) {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * æ£€æµ‹å¹¶ç»˜åˆ¶ç»“æžœ
     * @param sourceImage
     * @return
     */
    default BufferedImage detectAndDraw(BufferedImage sourceImage){
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    default GenericObjectPool<Predictor<Image, DirectionInfo>> getPool() {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
}
src/main/java/com/xindao/ocr/smartjavaai/model/common/direction/PPOCRMobileV2ClsModel.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,393 @@
package com.xindao.ocr.smartjavaai.model.common.direction;
import ai.djl.MalformedModelException;
import ai.djl.engine.Engine;
import ai.djl.inference.Predictor;
import ai.djl.modality.cv.Image;
import ai.djl.modality.cv.ImageFactory;
import ai.djl.ndarray.NDManager;
import ai.djl.repository.zoo.Criteria;
import ai.djl.repository.zoo.ModelNotFoundException;
import ai.djl.repository.zoo.ModelZoo;
import ai.djl.repository.zoo.ZooModel;
import cn.smartjavaai.common.pool.PredictorFactory;
import cn.smartjavaai.common.utils.FileUtils;
import cn.smartjavaai.common.utils.ImageUtils;
import cn.smartjavaai.common.utils.OpenCVUtils;
import com.xindao.ocr.smartjavaai.config.DirectionModelConfig;
import com.xindao.ocr.smartjavaai.entity.DirectionInfo;
import com.xindao.ocr.smartjavaai.entity.OcrBox;
import com.xindao.ocr.smartjavaai.entity.OcrItem;
import com.xindao.ocr.smartjavaai.enums.AngleEnum;
import com.xindao.ocr.smartjavaai.exception.OcrException;
import com.xindao.ocr.smartjavaai.model.common.detect.OcrCommonDetModel;
import com.xindao.ocr.smartjavaai.model.common.direction.criteria.DirectionCriteriaFactory;
import com.xindao.ocr.smartjavaai.utils.OcrUtils;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.pool2.impl.GenericObjectPool;
import org.opencv.core.Mat;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
/**
 * PPOCRMobileV2Model æ–¹å‘分类模型
 * @author dwj
 * @date 2025/4/21
 */
@Slf4j
public class PPOCRMobileV2ClsModel implements OcrDirectionModel {
    private GenericObjectPool<Predictor<Image, DirectionInfo>> predictorPool;
    private DirectionModelConfig config;
    private ZooModel<Image, DirectionInfo> model;
    private OcrCommonDetModel textDetModel;
    @Override
    public void loadModel(DirectionModelConfig config){
        if(StringUtils.isBlank(config.getModelPath())){
            throw new OcrException("modelPath is null");
        }
        this.config = config;
        this.textDetModel = config.getTextDetModel();
        ConcurrentHashMap params = new ConcurrentHashMap<String, String>();
        if(StringUtils.isNotBlank(config.getBatchifier())){
            params.put("batchifier", config.getBatchifier());
        }
        Criteria<Image, DirectionInfo> criteria = DirectionCriteriaFactory.createCriteria(config);
        try{
            model = ModelZoo.loadModel(criteria);
            // åˆ›å»ºæ± å­ï¼šæ¯ä¸ªçº¿ç¨‹ç‹¬äº« Predictor
            this.predictorPool = new GenericObjectPool<>(new PredictorFactory<>(model));
            int predictorPoolSize = config.getPredictorPoolSize();
            if(config.getPredictorPoolSize() <= 0){
                predictorPoolSize = Runtime.getRuntime().availableProcessors(); // é»˜è®¤ç­‰äºŽCPU核心数
            }
            predictorPool.setMaxTotal(predictorPoolSize);
            log.debug("当前设备: " + model.getNDManager().getDevice());
            log.debug("当前引擎: " + Engine.getInstance().getEngineName());
            log.debug("模型推理器线程池最大数量: " + predictorPoolSize);
        } catch (IOException | ModelNotFoundException | MalformedModelException e) {
            throw new OcrException("模型加载失败", e);
        }
    }
    @Override
    public List<OcrItem> detect(String imagePath){
        if(!FileUtils.isFileExists(imagePath)){
            throw new OcrException("图像文件不存在");
        }
        Image img = null;
        try {
            img = ImageFactory.getInstance().fromFile(Paths.get(imagePath));
            return detect(img);
        } catch (IOException e) {
            throw new OcrException("无效的图片", e);
        }finally {
            if(img != null){
                ((Mat)img.getWrappedImage()).release();
            }
        }
    }
    @Override
    public List<OcrItem> detect(Image image){
        if(Objects.isNull(textDetModel)){
            throw new OcrException("textDetModel is null");
        }
        //检测文本
        List<OcrBox> boxeList = textDetModel.detect(image);
        if(Objects.isNull(boxeList) || boxeList.isEmpty()){
            throw new OcrException("未检测到文本");
        }
        Mat srcMat = (Mat) image.getWrappedImage();
        return detect(boxeList, srcMat);
    }
//    /**
//     * åŸºäºŽæ–‡æœ¬æ¡†æ£€æµ‹æ–¹å‘
//     * @param box
//     * @param srcMat
//     * @param predictor
//     * @param manager
//     * @return
//     */
//    private OcrItem detect(OcrBox box, Mat srcMat, Predictor<Image, DirectionInfo> predictor, NDManager manager){
//        if(Objects.isNull(box)){
//            throw new OcrException("box参数为空");
//        }
//        try {
//            //透视变换及裁剪
//            Image subImg = OcrUtils.transformAndCrop(srcMat, box);
//            DirectionInfo directionInfo = null;
//            String angle;
//            //高宽比 > 1.5 çºµå‘
//            if (subImg.getHeight() * 1.0 / subImg.getWidth() > 1.5) {
//                //旋转图片90度
//                subImg = OcrUtils.rotateImg(manager, subImg);
//                //检测方向
//                directionInfo = predictor.predict(subImg);
//                if (directionInfo.getName().equalsIgnoreCase("Rotate")) {
//                    angle = "270";
//                } else {
//                    angle = "90";
//                }
//            }else{ //横向
//                directionInfo = predictor.predict(subImg);
//                if (directionInfo.getName().equalsIgnoreCase("No Rotate")) {
//                    angle = "0";
//                } else {
//                    angle = "180";
//                }
//            }
//            ((Mat)subImg.getWrappedImage()).release();
//            return new OcrItem(box, AngleEnum.fromValue(angle), directionInfo.getProb().floatValue());
//        } catch (Exception e) {
//            throw new OcrException("OCR检测错误", e);
//        }
//    }
    @Override
    public List<OcrItem> detect(List<OcrBox> boxList,Mat srcMat){
        if(Objects.isNull(boxList) || boxList.isEmpty()){
            throw new OcrException("boxList为空");
        }
        List<List<OcrItem>> ocrItemList = batchDetect(Collections.singletonList(boxList), Collections.singletonList(srcMat));
        if(Objects.isNull(ocrItemList) || ocrItemList.isEmpty()){
            throw new OcrException("方向检测失败");
        }
        return ocrItemList.get(0);
    }
    @Override
    public void detectAndDraw(String imagePath, String outputPath) {
        if(!FileUtils.isFileExists(imagePath)){
            throw new OcrException("图像文件不存在");
        }
        Image img = null;
        try {
            img = ImageFactory.getInstance().fromFile(Paths.get(imagePath));
            List<OcrItem> itemList = detect(img);
            if(Objects.isNull(itemList) || itemList.isEmpty()){
                throw new OcrException("未检测到文字");
            }
            OcrUtils.drawRectWithText((Mat) img.getWrappedImage(), itemList);
            Path output = Paths.get(outputPath);
            log.debug("Saving to {}", output.toAbsolutePath().toString());
            img.save(Files.newOutputStream(output), "png");
        } catch (IOException e) {
            throw new OcrException(e);
        } finally {
            if (img != null){
                ((Mat)img.getWrappedImage()).release();
            }
        }
    }
    @Override
    public List<OcrItem> detect(BufferedImage image) {
        if(!ImageUtils.isImageValid(image)){
            throw new OcrException("图像无效");
        }
        Image img = ImageFactory.getInstance().fromImage(OpenCVUtils.image2Mat(image));
        List<OcrItem> ocrItemList = detect(img);
        ((Mat)img.getWrappedImage()).release();
        return ocrItemList;
    }
    @Override
    public List<OcrItem> detect(byte[] imageData) {
        if(Objects.isNull(imageData)){
            throw new OcrException("图像无效");
        }
        try {
            BufferedImage image = ImageIO.read(new ByteArrayInputStream(imageData));
            return detect(image);
        } catch (IOException e) {
            throw new OcrException("错误的图像", e);
        }
    }
    @Override
    public BufferedImage detectAndDraw(BufferedImage sourceImage) {
        if(!ImageUtils.isImageValid(sourceImage)){
            throw new OcrException("图像无效");
        }
        Image img = ImageFactory.getInstance().fromImage(OpenCVUtils.image2Mat(sourceImage));
        List<OcrItem> ocrItemList = detect(img);
        if(Objects.isNull(ocrItemList) || ocrItemList.isEmpty()){
            throw new OcrException("未检测到文字");
        }
        OcrUtils.drawRectWithText((Mat) img.getWrappedImage(), ocrItemList);
        try {
            ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
            // è°ƒç”¨ save æ–¹æ³•å°† Image å†™å…¥å­—节流
            img.save(outputStream, "png");
            // å°†å­—节流转换为 BufferedImage
            byte[] imageBytes = outputStream.toByteArray();
            return ImageIO.read(new ByteArrayInputStream(imageBytes));
        } catch (IOException e) {
            throw new OcrException("导出图片失败", e);
        } finally {
            if (img != null){
                ((Mat) img.getWrappedImage()).release();
            }
        }
    }
    @Override
    public List<List<OcrItem>> batchDetect(List<List<OcrBox>> boxList, List<Mat> srcMatList) {
        if(CollectionUtils.isEmpty(boxList)){
            throw new OcrException("boxList ä¸èƒ½ä¸ºç©º");
        }
        if(CollectionUtils.isEmpty(srcMatList)){
            throw new OcrException("srcMatList ä¸èƒ½ä¸ºç©º");
        }
        //检查参数
        for (int i = 0; i < srcMatList.size(); i++) {
            List<OcrBox> ocrBoxes = boxList.get(i);
            Mat mat = srcMatList.get(i);
            if (ocrBoxes == null) {
                throw new OcrException("第 " + i + " ä¸ª boxList ä¸º null");
            }
            if (ocrBoxes.isEmpty()) {
                throw new OcrException("第 " + i + " ä¸ª boxList æ²¡æœ‰æ£€æµ‹ç»“æžœ");
            }
            if (mat.empty()) {
                throw new OcrException("第 " + i + " å¼ å›¾ç‰‡ä¸ºç©º Mat");
            }
        }
        List<Image> imageList = new ArrayList<Image>();
        List<Boolean> isRotatedList = new ArrayList<Boolean>();
        int index = 0;
        try (NDManager manager = model.getNDManager().newSubManager()){
            for(int i = 0; i < srcMatList.size(); i++){
                for (int j = 0; j < boxList.get(i).size(); j++){
                    //透视变换及裁剪
                    Image subImg = OcrUtils.transformAndCrop(srcMatList.get(i), boxList.get(i).get(j));
                    //高宽比 > 1.5 çºµå‘
                    if (subImg.getHeight() * 1.0 / subImg.getWidth() > 1.5) {
                        //旋转图片90度
                        subImg = OcrUtils.rotateImg(manager, subImg);
                        isRotatedList.add(true);
                        imageList.add(subImg);
                    }else{
                        isRotatedList.add(false);
                        imageList.add(subImg);
                    }
                    index++;
                }
            }
            List<List<OcrItem>> result = new ArrayList<>();
            List<DirectionInfo> directionInfos = batchDetect(imageList);
            if(CollectionUtils.isEmpty(directionInfos)){
                throw new OcrException("方向检测失败");
            }
            index = 0;
            for(int i = 0; i < srcMatList.size(); i++){
                List<OcrItem> ocrItemList = new ArrayList<>();
                for (int j = 0; j < boxList.get(i).size(); j++){
                    DirectionInfo directionInfo = directionInfos.get(index);
                    if(Objects.isNull(directionInfo)){
                        throw new OcrException("方向检测失败: ç¬¬" + i + "张图片, ç¬¬" + j + "个文本块,未检测到方向");
                    }
                    String angle;
                    if(isRotatedList.get(index)){
                        if (directionInfo.getName().equalsIgnoreCase("Rotate")) {
                            angle = "270";
                        } else {
                            angle = "90";
                        }
                    }else{
                        if (directionInfo.getName().equalsIgnoreCase("No Rotate")) {
                            angle = "0";
                        } else {
                            angle = "180";
                        }
                    }
                    OcrItem ocrItem = new OcrItem(boxList.get(i).get(j), AngleEnum.fromValue(angle), directionInfo.getProb().floatValue());
                    ocrItemList.add(ocrItem);
                    index++;
                }
                result.add(ocrItemList);
            }
            return result;
        }
    }
    private List<DirectionInfo> batchDetect(List<Image> imageList) {
        Predictor<Image, DirectionInfo> predictor = null;
        try {
            predictor = predictorPool.borrowObject();
            return predictor.batchPredict(imageList);
        } catch (Exception e) {
            throw new OcrException("OCR检测错误", e);
        }finally {
            if (predictor != null) {
                try {
                    predictorPool.returnObject(predictor); //归还
                } catch (Exception e) {
                    log.warn("归还Predictor失败", e);
                    try {
                        predictor.close(); // å½’还失败才销毁
                    } catch (Exception ex) {
                        log.error("关闭Predictor失败", ex);
                    }
                }
            }
        }
    }
    @Override
    public void setTextDetModel(OcrCommonDetModel detModel) {
        this.textDetModel = detModel;
    }
    @Override
    public OcrCommonDetModel getTextDetModel() {
        return textDetModel;
    }
    @Override
    public GenericObjectPool<Predictor<Image, DirectionInfo>> getPool() {
        return predictorPool;
    }
    @Override
    public void close() throws Exception {
        try {
            if (predictorPool != null) {
                predictorPool.close();
            }
        } catch (Exception e) {
            log.warn("关闭 predictorPool å¤±è´¥", e);
        }
        try {
            if (model != null) {
                model.close();
            }
        } catch (Exception e) {
            log.warn("关闭 model å¤±è´¥", e);
        }
    }
}
src/main/java/com/xindao/ocr/smartjavaai/model/common/direction/criteria/DirectionCriteriaFactory.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,57 @@
package com.xindao.ocr.smartjavaai.model.common.direction.criteria;
import ai.djl.Device;
import ai.djl.modality.cv.Image;
import ai.djl.repository.zoo.Criteria;
import ai.djl.training.util.ProgressBar;
import cn.smartjavaai.common.enums.DeviceEnum;
import com.xindao.ocr.smartjavaai.config.DirectionModelConfig;
import com.xindao.ocr.smartjavaai.entity.DirectionInfo;
import com.xindao.ocr.smartjavaai.enums.DirectionModelEnum;
import com.xindao.ocr.smartjavaai.model.common.direction.translator.PpWordRotateTranslator;
import org.apache.commons.lang3.StringUtils;
import java.nio.file.Paths;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
/**
 * è¡Œæ–¹å‘分类
 * @author dwj
 */
public class DirectionCriteriaFactory {
    public static Criteria<Image, DirectionInfo> createCriteria(DirectionModelConfig config) {
        Device device = null;
        if(!Objects.isNull(config.getDevice())){
            device = config.getDevice() == DeviceEnum.CPU ? Device.cpu() : Device.gpu(config.getGpuId());
        }
        Criteria<Image, DirectionInfo> criteria = null;
        ConcurrentHashMap params = new ConcurrentHashMap<String, String>();
        params.putAll(config.getCustomParams());
        if(StringUtils.isNotBlank(config.getBatchifier())){
            params.put("batchifier", config.getBatchifier());
        }
        if(config.getModelEnum() == DirectionModelEnum.CH_PPOCR_MOBILE_V2_CLS){
            params.put("resizeWidth", 192);
            params.put("resizeHeight", 48);
        }else if (config.getModelEnum() == DirectionModelEnum.PP_LCNET_X0_25){
            params.put("resizeWidth", 160);
            params.put("resizeHeight", 80);
        }else if (config.getModelEnum() == DirectionModelEnum.PP_LCNET_X1_0){
            params.put("resizeWidth", 160);
            params.put("resizeHeight", 80);
        }
        criteria =
                Criteria.builder()
                        .optEngine("OnnxRuntime")
                        .setTypes(Image.class, DirectionInfo.class)
                        .optModelPath(Paths.get(config.getModelPath()))
                        .optDevice(device)
                        .optTranslator(new PpWordRotateTranslator(params))
                        .optProgress(new ProgressBar())
                        .build();
        return criteria;
    }
}
src/main/java/com/xindao/ocr/smartjavaai/model/common/direction/translator/PpWordRotateTranslator.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,105 @@
package com.xindao.ocr.smartjavaai.model.common.direction.translator;
import ai.djl.modality.cv.Image;
import ai.djl.modality.cv.util.NDImageUtils;
import ai.djl.ndarray.NDArray;
import ai.djl.ndarray.NDList;
import ai.djl.ndarray.index.NDIndex;
import ai.djl.ndarray.types.Shape;
import ai.djl.translate.Batchifier;
import ai.djl.translate.Translator;
import ai.djl.translate.TranslatorContext;
import com.xindao.ocr.smartjavaai.entity.DirectionInfo;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
/**
 * æ–¹å‘检测
 *
 * @author Calvin
 * @mail 179209347@qq.com
 * @website www.aias.top
 */
public class PpWordRotateTranslator implements Translator<Image, DirectionInfo> {
    List<String> classes = Arrays.asList("No Rotate", "Rotate");
    private String batchifier;
    private int resizeHeight;
    private int resizeWidth;
    public PpWordRotateTranslator(Map<String, ?> arguments) {
        batchifier =  arguments.containsKey("batchifier")
                ? arguments.get("batchifier").toString()
                : "padding";
        resizeWidth =  arguments.containsKey("resizeWidth")
                ? (Integer) arguments.get("resizeWidth")
                : 192;
        resizeHeight =  arguments.containsKey("resizeHeight")
                ? (Integer) arguments.get("resizeHeight")
                : 48;
    }
    @Override
    public DirectionInfo processOutput(TranslatorContext ctx, NDList list) {
        NDArray prob = list.singletonOrThrow();
        float[] res = prob.toFloatArray();
        int maxIndex = 0;
        if (res[1] > res[0]) {
            maxIndex = 1;
        }
        return new DirectionInfo(classes.get(maxIndex), Double.valueOf(res[maxIndex]));
    }
//    public NDList processInput2(TranslatorContext ctx, Image input){
//        NDArray img = input.toNDArray(ctx.getNDManager());
//        img = NDImageUtils.resize(img, 192, 48);
//        img = NDImageUtils.toTensor(img).sub(0.5F).div(0.5F);
//        img = img.expandDims(0);
//        return new NDList(new NDArray[]{img});
//    }
    @Override
    public NDList processInput(TranslatorContext ctx, Image input) {
        NDArray img = input.toNDArray(ctx.getNDManager());
        int imgC = 3;
        int imgH = resizeHeight;
        int imgW = resizeWidth;
        NDArray array = ctx.getNDManager().zeros(new Shape(imgC, imgH, imgW));
        int h = input.getHeight();
        int w = input.getWidth();
        int resized_w = 0;
        float ratio = (float) w / (float) h;
        if (Math.ceil(imgH * ratio) > imgW) {
            resized_w = imgW;
        } else {
            resized_w = (int) (Math.ceil(imgH * ratio));
        }
        img = NDImageUtils.resize(img, resized_w, imgH);
        img = NDImageUtils.toTensor(img).sub(0.5F).div(0.5F);
        //    img = img.transpose(2, 0, 1);
        array.set(new NDIndex(":,:,0:" + resized_w), img);
//        array = array.expandDims(0);
        return new NDList(new NDArray[]{array});
    }
    @Override
    public Batchifier getBatchifier() {
        return Batchifier.fromString(batchifier);
    }
}
src/main/java/com/xindao/ocr/smartjavaai/model/common/recognize/OcrCommonRecModel.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,129 @@
package com.xindao.ocr.smartjavaai.model.common.recognize;
import ai.djl.inference.Predictor;
import ai.djl.modality.cv.Image;
import com.xindao.ocr.smartjavaai.config.OcrRecModelConfig;
import com.xindao.ocr.smartjavaai.config.OcrRecOptions;
import com.xindao.ocr.smartjavaai.entity.OcrInfo;
import com.xindao.ocr.smartjavaai.model.common.detect.OcrCommonDetModel;
import com.xindao.ocr.smartjavaai.model.common.direction.OcrDirectionModel;
import org.apache.commons.pool2.impl.GenericObjectPool;
import java.awt.image.BufferedImage;
import java.util.List;
/**
 * OCR é€šç”¨è¯†åˆ«æ¨¡åž‹
 * @author dwj
 */
public interface OcrCommonRecModel extends AutoCloseable{
    default void setTextDetModel(OcrCommonDetModel detModel){
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    default OcrCommonDetModel getTextDetModel(){
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    default void setDirectionModel(OcrDirectionModel directionModel){
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    default OcrDirectionModel getDirectionModel(){
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * åŠ è½½æ¨¡åž‹
     * @param config
     */
    void loadModel(OcrRecModelConfig config); // åŠ è½½æ¨¡åž‹
    /**
     * æ–‡æœ¬è¯†åˆ«
     * @param imagePath å›¾ç‰‡è·¯å¾„
     * @return
     */
    default OcrInfo recognize(String imagePath, OcrRecOptions options) {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * æ–‡æœ¬è¯†åˆ«
     * @param image
     * @return
     */
    default OcrInfo recognize(Image image, OcrRecOptions options) {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * æ–‡æœ¬æ£€æµ‹
     * @param image BufferedImage
     * @return
     */
    default OcrInfo recognize(BufferedImage image, OcrRecOptions options) {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * æ–‡æœ¬æ£€æµ‹
     * @param imageData å›¾ç‰‡å­—节数组
     * @return
     */
    default OcrInfo recognize(byte[] imageData, OcrRecOptions options) {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * è¯†åˆ«å¹¶ç»˜åˆ¶ç»“æžœ
     * @param imagePath
     * @param outputPath
     */
    default void recognizeAndDraw(String imagePath, String outputPath, int fontSize, OcrRecOptions options) {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * è¯†åˆ«å¹¶ç»˜åˆ¶ç»“æžœ
     * @param sourceImage
     * @return
     */
    default BufferedImage recognizeAndDraw(BufferedImage sourceImage, int fontSize, OcrRecOptions options){
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * è¯†åˆ«å¹¶ç»˜åˆ¶Base64结果
     * @param imageData å›¾ç‰‡å­—节数组
     * @return
     */
    default String recognizeAndDrawToBase64(byte[] imageData, int fontSize, OcrRecOptions options){
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * è¯†åˆ«å¹¶ç»˜åˆ¶ç»“æžœ
     * @param imageData å›¾ç‰‡å­—节数组
     * @return
     */
    default OcrInfo recognizeAndDraw(byte[] imageData, int fontSize, OcrRecOptions options){
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    default List<OcrInfo> batchRecognize(List<BufferedImage> imageList, OcrRecOptions options) {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    default List<OcrInfo> batchRecognizeDJLImage(List<Image> imageList, OcrRecOptions options) {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    default GenericObjectPool<Predictor<Image, String>> getPool() {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
}
src/main/java/com/xindao/ocr/smartjavaai/model/common/recognize/OcrCommonRecModelImpl.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,502 @@
package com.xindao.ocr.smartjavaai.model.common.recognize;
import ai.djl.MalformedModelException;
import ai.djl.engine.Engine;
import ai.djl.inference.Predictor;
import ai.djl.modality.cv.Image;
import ai.djl.modality.cv.ImageFactory;
import ai.djl.ndarray.NDArray;
import ai.djl.ndarray.NDManager;
import ai.djl.repository.zoo.Criteria;
import ai.djl.repository.zoo.ModelNotFoundException;
import ai.djl.repository.zoo.ModelZoo;
import ai.djl.repository.zoo.ZooModel;
import cn.hutool.core.img.ImgUtil;
import cn.smartjavaai.common.pool.PredictorFactory;
import cn.smartjavaai.common.utils.FileUtils;
import cn.smartjavaai.common.utils.ImageUtils;
import cn.smartjavaai.common.utils.OpenCVUtils;
import com.xindao.ocr.smartjavaai.config.OcrRecModelConfig;
import com.xindao.ocr.smartjavaai.config.OcrRecOptions;
import com.xindao.ocr.smartjavaai.entity.*;
import com.xindao.ocr.smartjavaai.exception.OcrException;
import com.xindao.ocr.smartjavaai.model.common.detect.OcrCommonDetModel;
import com.xindao.ocr.smartjavaai.model.common.direction.OcrDirectionModel;
import com.xindao.ocr.smartjavaai.model.common.recognize.criteria.OcrCommonRecCriterialFactory;
import com.xindao.ocr.smartjavaai.utils.OcrUtils;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.pool2.impl.GenericObjectPool;
import org.opencv.core.Mat;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
/**
 * PPOCRV5 è¯†åˆ«æ¨¡åž‹
 *
 * @author dwj
 */
@Slf4j
public class OcrCommonRecModelImpl implements OcrCommonRecModel {
    private GenericObjectPool<Predictor<Image, String>> recPredictorPool;
    private OcrRecModelConfig config;
    private ZooModel<Image, String> recognitionModel;
    private OcrDirectionModel directionModel;
    private OcrCommonDetModel textDetModel;
    @Override
    public void loadModel(OcrRecModelConfig config) {
        if (StringUtils.isBlank(config.getRecModelPath())) {
            throw new OcrException("recModelPath is null");
        }
        this.config = config;
        this.directionModel = config.getDirectionModel();
        this.textDetModel = config.getTextDetModel();
        //初始化 è¯†åˆ«Criteria
        Criteria<Image, String> recCriteria = OcrCommonRecCriterialFactory.createCriteria(config);
        try {
            recognitionModel = ModelZoo.loadModel(recCriteria);
            this.recPredictorPool = new GenericObjectPool<>(new PredictorFactory<>(recognitionModel));
            int predictorPoolSize = config.getPredictorPoolSize();
            if (config.getPredictorPoolSize() <= 0) {
                predictorPoolSize = Runtime.getRuntime().availableProcessors(); // é»˜è®¤ç­‰äºŽCPU核心数
            }
            recPredictorPool.setMaxTotal(predictorPoolSize);
            log.debug("当前设备: " + recognitionModel.getNDManager().getDevice());
            log.debug("当前引擎: " + Engine.getInstance().getEngineName());
            log.debug("模型推理器线程池最大数量: " + predictorPoolSize);
        } catch (IOException | ModelNotFoundException | MalformedModelException e) {
            throw new OcrException("识别模型加载失败", e);
        }
    }
    @Override
    public OcrInfo recognize(String imagePath, OcrRecOptions options) {
        if (StringUtils.isBlank(config.getRecModelPath())) {
            throw new OcrException("recModelPath为空,无法识别");
        }
        if (!FileUtils.isFileExists(imagePath)) {
            throw new OcrException("图像文件不存在");
        }
        Image img = null;
        try {
            img = ImageFactory.getInstance().fromFile(Paths.get(imagePath));
            return recognize(img, options);
        } catch (IOException e) {
            throw new OcrException("无效的图片", e);
        } finally {
            if (img != null) {
                ((Mat) img.getWrappedImage()).release();
            }
        }
    }
    /**
     * @param image
     * @param options
     * @return
     */
    @Override
    public OcrInfo recognize(Image image, OcrRecOptions options) {
        List<OcrInfo> result = batchRecognizeDJLImage(Collections.singletonList(image), options);
        if (CollectionUtils.isEmpty(result)) {
            throw new OcrException("OCR识别结果为空");
        }
        return result.get(0);
    }
    /**
     * æ‰¹é‡çŸ«æ­£æ–‡æœ¬æ¡†
     *
     * @param boxList
     * @param srcMat
     * @param manager
     * @return
     */
    private List<Image> batchAlign(List<OcrBox> boxList, Mat srcMat, NDManager manager) {
        List<Image> imageList = new ArrayList<>(boxList.size());
        for (int i = 0; i < boxList.size(); i++) {
            //透视变换 + è£å‰ª
            Image subImg = OcrUtils.transformAndCrop(srcMat, boxList.get(i));
            //ImageUtils.saveImage(subImg, i + "crop.png", "build/output");
            //高宽比 > 1.5
            if (subImg.getHeight() * 1.0 / subImg.getWidth() > 1.5) {
                //旋转图片90度
                subImg = OcrUtils.rotateImg(manager, subImg);
                //ImageUtils.saveImage(subImg, i + "rotate.png", "build/output");
            }
            imageList.add(subImg);
        }
        return imageList;
    }
    /**
     * æ‰¹é‡çŸ«æ­£æ–‡æœ¬æ¡†
     *
     * @param itemList
     * @param srcMat
     * @param manager
     * @return
     */
    private List<Image> batchAlignWithDirection(List<OcrItem> itemList, Mat srcMat, NDManager manager) {
        List<Image> imageList = new ArrayList<>(itemList.size());
        for (OcrItem ocrItem : itemList) {
            //放射变换+裁剪
            Image subImage = OcrUtils.transformAndCrop(srcMat, ocrItem.getOcrBox());
            //ImageUtils.saveImage(subImage, UUID.randomUUID().toString() + "_aaa.png", "build/output");
            //纠正文本框
            subImage = OcrUtils.rotateImg(subImage, ocrItem.getAngle());
            imageList.add(subImage);
        }
        return imageList;
    }
//    private RotatedBox recognize(OcrBox box,Mat srcMat,Predictor<Image, String> recPredictor,NDManager manager){
//        try {
//            //透视变换 + è£å‰ª
//            Image subImg = OcrUtils.transformAndCrop(srcMat, box);
//            //ImageUtils.saveImage(subImg, i + "crop.png", "build/output");
//            //高宽比 > 1.5
//            if (subImg.getHeight() * 1.0 / subImg.getWidth() > 1.5) {
//                //旋转图片90度
//                subImg = OcrUtils.rotateImg(manager, subImg);
//                //ImageUtils.saveImage(subImg, i + "rotate.png", "build/output");
//            }
//            String name = recPredictor.predict(subImg);
//            ((Mat)subImg.getWrappedImage()).release();
//            NDArray pointsArray = manager.create(box.toFloatArray());
//            return new RotatedBox(pointsArray, name);
//        } catch (Exception e) {
//            throw new OcrException("OCR检测错误", e);
//        }
//    }
    /**
     * åŽå¤„理:排序,分行
     *
     * @param rotatedBoxes
     */
    private OcrInfo postProcessOcrResult(List<RotatedBox> rotatedBoxes, OcrRecOptions ocrRecOptions) {
        //不分行
        if (!ocrRecOptions.isEnableLineSplit()) {
            return OcrUtils.convertRotatedBoxesToOcrItems(rotatedBoxes);
        }
        //Y坐标升序排序
        List<RotatedBox> initList = new ArrayList<>();
        for (RotatedBox result : rotatedBoxes) {
            initList.add(result);
        }
        Collections.sort(initList);
        //多行文本框的集合
        List<ArrayList<RotatedBoxCompX>> lines = new ArrayList<>();
        List<RotatedBoxCompX> line = new ArrayList<>();
        RotatedBoxCompX firstBox = new RotatedBoxCompX(initList.get(0).getBox(), initList.get(0).getText());
        line.add(firstBox);
        lines.add((ArrayList) line);
        //分行判断
        for (int i = 1; i < initList.size(); i++) {
            RotatedBoxCompX tmpBox = new RotatedBoxCompX(initList.get(i).getBox(), initList.get(i).getText());
            float y1 = firstBox.getBox().toFloatArray()[1];
            float y2 = tmpBox.getBox().toFloatArray()[1];
            float dis = Math.abs(y2 - y1);
            if (dis < 20) { // è®¤ä¸ºæ˜¯åŒ 1 è¡Œ  - Considered to be in the same line
                line.add(tmpBox);
            } else { // æ¢è¡Œ - Line break
                firstBox = tmpBox;
                Collections.sort(line);
                line = new ArrayList<>();
                line.add(firstBox);
                lines.add((ArrayList) line);
            }
        }
        return OcrUtils.convertToOcrInfo(lines);
    }
    @Override
    public void recognizeAndDraw(String imagePath, String outputPath, int fontSize, OcrRecOptions options) {
        if (!FileUtils.isFileExists(imagePath)) {
            throw new OcrException("图像文件不存在");
        }
        try {
            Image img = ImageFactory.getInstance().fromFile(Paths.get(imagePath));
            OcrInfo ocrInfo = recognize(img, options);
            if (Objects.isNull(ocrInfo) || Objects.isNull(ocrInfo.getLineList()) || ocrInfo.getLineList().isEmpty()) {
                throw new OcrException("未检测到文字");
            }
            Mat wrappedImage = (Mat) img.getWrappedImage();
            BufferedImage bufferedImage = OpenCVUtils.mat2Image(wrappedImage);
            OcrUtils.drawRectWithText(bufferedImage, ocrInfo, fontSize);
            ImageUtils.saveImage(bufferedImage, outputPath);
            wrappedImage.release();
        } catch (IOException e) {
            throw new OcrException(e);
        }
    }
    @Override
    public OcrInfo recognize(BufferedImage image, OcrRecOptions options) {
        if (!ImageUtils.isImageValid(image)) {
            throw new OcrException("图像无效");
        }
        Image img = ImageFactory.getInstance().fromImage(OpenCVUtils.image2Mat(image));
        OcrInfo ocrInfo = recognize(img, options);
        ((Mat) img.getWrappedImage()).release();
        return ocrInfo;
    }
    @Override
    public OcrInfo recognize(byte[] imageData, OcrRecOptions options) {
        if (Objects.isNull(imageData)) {
            throw new OcrException("图像无效");
        }
        try {
            BufferedImage image = ImageIO.read(new ByteArrayInputStream(imageData));
            return recognize(image, options);
        } catch (IOException e) {
            throw new OcrException("错误的图像", e);
        }
    }
    @Override
    public BufferedImage recognizeAndDraw(BufferedImage sourceImage, int fontSize, OcrRecOptions options) {
        if (!ImageUtils.isImageValid(sourceImage)) {
            throw new OcrException("图像无效");
        }
        Image img = ImageFactory.getInstance().fromImage(OpenCVUtils.image2Mat(sourceImage));
        OcrInfo ocrInfo = recognize(img, options);
        if (Objects.isNull(ocrInfo) || Objects.isNull(ocrInfo.getLineList()) || ocrInfo.getLineList().isEmpty()) {
            throw new OcrException("未检测到文字");
        }
        OcrUtils.drawRectWithText(sourceImage, ocrInfo, fontSize);
        return sourceImage;
    }
    @Override
    public String recognizeAndDrawToBase64(byte[] imageData, int fontSize, OcrRecOptions options) {
        if (Objects.isNull(imageData)) {
            throw new OcrException("图像无效");
        }
        OcrInfo ocrInfo = recognize(imageData, options);
        if (Objects.isNull(ocrInfo) || Objects.isNull(ocrInfo.getLineList()) || ocrInfo.getLineList().isEmpty()) {
            throw new OcrException("未检测到文字");
        }
        try {
            BufferedImage sourceImage = ImageIO.read(new ByteArrayInputStream(imageData));
            OcrUtils.drawRectWithText(sourceImage, ocrInfo, fontSize);
            return ImgUtil.toBase64(sourceImage, "png");
        } catch (IOException e) {
            throw new OcrException("导出图片失败", e);
        }
    }
    @Override
    public OcrInfo recognizeAndDraw(byte[] imageData, int fontSize, OcrRecOptions options) {
        if (Objects.isNull(imageData)) {
            throw new OcrException("图像无效");
        }
        OcrInfo ocrInfo = recognize(imageData, options);
        if (Objects.isNull(ocrInfo) || Objects.isNull(ocrInfo.getLineList()) || ocrInfo.getLineList().isEmpty()) {
            throw new OcrException("未检测到文字");
        }
        try {
            BufferedImage sourceImage = ImageIO.read(new ByteArrayInputStream(imageData));
            OcrUtils.drawRectWithText(sourceImage, ocrInfo, fontSize);
            ocrInfo.setBase64Img(ImgUtil.toBase64(sourceImage, "png"));
            return ocrInfo;
        } catch (IOException e) {
            throw new OcrException("导出图片失败", e);
        }
    }
    @Override
    public List<OcrInfo> batchRecognize(List<BufferedImage> imageList, OcrRecOptions options) {
        List<Image> djlImageList = new ArrayList<>(imageList.size());
        try {
            for (BufferedImage bufferedImage : imageList) {
                djlImageList.add(ImageFactory.getInstance().fromImage(OpenCVUtils.image2Mat(bufferedImage)));
            }
            return batchRecognizeDJLImage(djlImageList, options);
        } catch (Exception e) {
            throw new OcrException(e);
        } finally {
            djlImageList.forEach(image -> ((Mat) image.getWrappedImage()).release());
        }
    }
    @Override
    public List<OcrInfo> batchRecognizeDJLImage(List<Image> imageList, OcrRecOptions options) {
        if (Objects.isNull(textDetModel)) {
            throw new OcrException("textDetModel is null");
        }
        OcrRecOptions ocrRecOptions = options;
        if (Objects.isNull(options)) {
            ocrRecOptions = new OcrRecOptions();
        }
        if (CollectionUtils.isEmpty(imageList)) {
            throw new OcrException("imageList is empty");
        }
        //检测文本
        List<List<OcrBox>> boxeList = textDetModel.batchDetectDJLImage(imageList);
        if (CollectionUtils.isEmpty(boxeList) || boxeList.size() != imageList.size()) {
            throw new OcrException("未检测到文本");
        }
        Predictor<Image, String> predictor = null;
        List<OcrInfo> ocrInfoList = new ArrayList<OcrInfo>();
        try (NDManager manager = NDManager.newBaseManager()) {
            predictor = recPredictorPool.borrowObject();
            List<Image> allImageAlignList = new ArrayList<Image>();
            //检测方向
            if (ocrRecOptions.isEnableDirectionCorrect()) {
                if (Objects.isNull(directionModel)) {
                    throw new OcrException("请配置方向模型");
                }
                List<Mat> matList = imageList.stream()
                        .map(image -> (Mat) image.getWrappedImage())
                        .collect(Collectors.toList());
                List<List<OcrItem>> ocrItemList = directionModel.batchDetect(boxeList, matList);
                if (CollectionUtils.isEmpty(ocrItemList) || ocrItemList.size() != imageList.size()) {
                    throw new OcrException("方向检测失败");
                }
                allImageAlignList = new ArrayList<Image>();
                for (int i = 0; i < ocrItemList.size(); i++) {
                    Mat srcMat = (Mat) imageList.get(i).getWrappedImage();
                    List<Image> imageAlignList = batchAlignWithDirection(ocrItemList.get(i), srcMat, manager);
//                    for(int j = 0; j < imageAlignList.size(); j++){
//                        ImageUtils.saveImage(imageAlignList.get(j),"dir-"+i+"-"+j+".png","/Users/xxx/Downloads/testing33");
//                    }
                    allImageAlignList.addAll(imageAlignList);
                }
            } else {
                for (int i = 0; i < boxeList.size(); i++) {
                    Mat srcMat = (Mat) imageList.get(i).getWrappedImage();
                    List<Image> imageAlignList = batchAlign(boxeList.get(i), srcMat, manager);
//                    for(int j = 0; j < imageAlignList.size(); j++){
//                        ImageUtils.saveImage(imageAlignList.get(j),i+"-"+j+".png","/Users/xxx/Downloads/testing33");
//                    }
                    allImageAlignList.addAll(imageAlignList);
                }
            }
            List<String> textList = batchRecognize(allImageAlignList);
            int textIndex = 0;
            for (int i = 0; i < boxeList.size(); i++) {
                List<RotatedBox> rotatedBoxes = new ArrayList<>();
                for (int j = 0; j < boxeList.get(i).size(); j++) {
                    if (textIndex >= textList.size()) {
                        throw new OcrException("识别失败: ç¬¬" + i + "张图片, ç¬¬" + j + "个文本块,未识别到文本");
                    }
                    OcrBox box = boxeList.get(i).get(j);
                    NDArray pointsArray = manager.create(box.toFloatArray());
                    rotatedBoxes.add(new RotatedBox(pointsArray, textList.get(textIndex)));
                    textIndex++;
                }
                OcrInfo ocrInfo = postProcessOcrResult(rotatedBoxes, ocrRecOptions);
                ocrInfoList.add(ocrInfo);
            }
            return ocrInfoList;
        } catch (Exception e) {
            throw new OcrException("OCR检测错误", e);
        } finally {
            if (predictor != null) {
                try {
                    recPredictorPool.returnObject(predictor); //归还
                } catch (Exception e) {
                    log.warn("归还Predictor失败", e);
                    try {
                        predictor.close(); // å½’还失败才销毁
                    } catch (Exception ex) {
                        log.error("关闭Predictor失败", ex);
                    }
                }
            }
        }
    }
    private List<String> batchRecognize(List<Image> imageAlignList) {
        Predictor<Image, String> predictor = null;
        try {
            predictor = recPredictorPool.borrowObject();
            List<String> textList = predictor.batchPredict(imageAlignList);
            imageAlignList.forEach(subImg -> ((Mat) subImg.getWrappedImage()).release());
            return textList;
        } catch (Exception e) {
            throw new OcrException("OCR检测错误", e);
        } finally {
            if (predictor != null) {
                try {
                    recPredictorPool.returnObject(predictor); //归还
                } catch (Exception e) {
                    log.warn("归还Predictor失败", e);
                    try {
                        predictor.close(); // å½’还失败才销毁
                    } catch (Exception ex) {
                        log.error("关闭Predictor失败", ex);
                    }
                }
            }
        }
    }
    @Override
    public void setTextDetModel(OcrCommonDetModel detModel) {
        this.textDetModel = detModel;
    }
    @Override
    public OcrCommonDetModel getTextDetModel() {
        return textDetModel;
    }
    @Override
    public void setDirectionModel(OcrDirectionModel directionModel) {
        this.directionModel = directionModel;
    }
    @Override
    public OcrDirectionModel getDirectionModel() {
        return directionModel;
    }
    public GenericObjectPool<Predictor<Image, String>> getRecPredictorPool() {
        return recPredictorPool;
    }
    @Override
    public void close() throws Exception {
        try {
            if (recPredictorPool != null) {
                recPredictorPool.close();
            }
        } catch (Exception e) {
            log.warn("关闭 predictorPool å¤±è´¥", e);
        }
        try {
            if (recognitionModel != null) {
                recognitionModel.close();
            }
        } catch (Exception e) {
            log.warn("关闭 model å¤±è´¥", e);
        }
    }
}
src/main/java/com/xindao/ocr/smartjavaai/model/common/recognize/criteria/OcrCommonRecCriterialFactory.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,51 @@
package com.xindao.ocr.smartjavaai.model.common.recognize.criteria;
import ai.djl.Device;
import ai.djl.modality.cv.Image;
import ai.djl.repository.zoo.Criteria;
import ai.djl.training.util.ProgressBar;
import cn.smartjavaai.common.enums.DeviceEnum;
import com.xindao.ocr.smartjavaai.config.OcrRecModelConfig;
import com.xindao.ocr.smartjavaai.enums.CommonRecModelEnum;
import com.xindao.ocr.smartjavaai.model.common.recognize.translator.PPOCRRecTranslator;
import org.apache.commons.lang3.StringUtils;
import java.nio.file.Paths;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
/**
 * @author dwj
 * @date 2025/7/8
 */
public class OcrCommonRecCriterialFactory {
    public static Criteria<Image, String> createCriteria(OcrRecModelConfig config) {
        Device device = null;
        if(!Objects.isNull(config.getDevice())){
            device = config.getDevice() == DeviceEnum.CPU ? Device.cpu() : Device.gpu(config.getGpuId());
        }
        Criteria<Image, String> criteria = null;
        ConcurrentHashMap params = new ConcurrentHashMap<String, String>();
        params.putAll(config.getCustomParams());
        if(StringUtils.isNotBlank(config.getBatchifier())){
            params.put("batchifier", config.getBatchifier());
        }
        if(config.getRecModelEnum() == CommonRecModelEnum.PP_OCR_V5_SERVER_REC_MODEL ||
                config.getRecModelEnum() == CommonRecModelEnum.PP_OCR_V5_MOBILE_REC_MODEL ||
                config.getRecModelEnum() == CommonRecModelEnum.PP_OCR_V4_SERVER_REC_MODEL ||
                config.getRecModelEnum() == CommonRecModelEnum.PP_OCR_V4_MOBILE_REC_MODEL ){
            criteria =
                    Criteria.builder()
                            .optEngine("OnnxRuntime")
                            .setTypes(Image.class, String.class)
                            .optModelPath(Paths.get(config.getRecModelPath()))
                            .optTranslator(new PPOCRRecTranslator(params))
                            .optProgress(new ProgressBar())
                            .optDevice(device)
                            .build();
        }
        return criteria;
    }
}
src/main/java/com/xindao/ocr/smartjavaai/model/common/recognize/translator/PPOCRRecTranslator.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,129 @@
package com.xindao.ocr.smartjavaai.model.common.recognize.translator;
import ai.djl.Model;
import ai.djl.modality.cv.Image;
import ai.djl.modality.cv.util.NDImageUtils;
import ai.djl.ndarray.NDArray;
import ai.djl.ndarray.NDList;
import ai.djl.ndarray.index.NDIndex;
import ai.djl.ndarray.types.DataType;
import ai.djl.ndarray.types.Shape;
import ai.djl.translate.Batchifier;
import ai.djl.translate.Translator;
import ai.djl.translate.TranslatorContext;
import ai.djl.util.Utils;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
/**
 * æ–‡å­—识别前后处理
 *
 */
public class PPOCRRecTranslator implements Translator<Image, String> {
    private List<String> table;
    private final boolean use_space_char;
    private String batchifier;
    public PPOCRRecTranslator(Map<String, ?> arguments) {
        use_space_char =
                arguments.containsKey("use_space_char")
                        ? Boolean.parseBoolean(arguments.get("use_space_char").toString())
                        : true;
        batchifier =  arguments.containsKey("batchifier")
                ? arguments.get("batchifier").toString()
                : "padding";
    }
    @Override
    public void prepare(TranslatorContext ctx) throws IOException {
        Model model = ctx.getModel();
        try (InputStream is = model.getArtifact("dict.txt").openStream()) {
            table = Utils.readLines(is, true);
            table.add(0, "blank");
            if(use_space_char){
                table.add(" ");
                table.add(" ");
            }
            else{
                table.add("");
                table.add("");
            }
        }
    }
    @Override
    public String processOutput(TranslatorContext ctx, NDList list) throws IOException {
        StringBuilder sb = new StringBuilder();
        NDArray tokens = list.singletonOrThrow();
//        long[] indices = tokens.get(0).argMax(1).toLongArray();
        long[] indices = tokens.argMax(1).toLongArray();
        boolean[] selection = new boolean[indices.length];
        Arrays.fill(selection, true);
        for (int i = 1; i < indices.length; i++) {
            if (indices[i] == indices[i - 1]) {
                selection[i] = false;
            }
        }
        // å­—符置信度
//        float[] probs = new float[indices.length];
//        for (int row = 0; row < indices.length; row++) {
//            NDArray value = tokens.get(0).get(new NDIndex(""+ row +":" + (row + 1) +"," + indices[row] +":" + ( indices[row] + 1)));
//            probs[row] = value.toFloatArray()[0];
//        }
        int lastIdx = 0;
        for (int i = 0; i < indices.length; i++) {
            if (selection[i] == true && indices[i] > 0 && !(i > 0 && indices[i] == lastIdx)) {
                sb.append(table.get((int) indices[i]));
            }
        }
        return sb.toString();
    }
    @Override
    public NDList processInput(TranslatorContext ctx, Image input) {
        NDArray img = input.toNDArray(ctx.getNDManager(), Image.Flag.COLOR);
        int imgC = 3;
        int imgH = 48;
        int imgW = 320;
        float max_wh_ratio = (float) imgW / (float) imgH;
        int h = input.getHeight();
        int w = input.getWidth();
        float wh_ratio = (float) w / (float) h;
        max_wh_ratio = Math.max(max_wh_ratio,wh_ratio);
        imgW = (int)(imgH * max_wh_ratio);
        int resized_w;
        if (Math.ceil(imgH * wh_ratio) > imgW) {
            resized_w = imgW;
        } else {
            resized_w = (int) (Math.ceil(imgH * wh_ratio));
        }
        NDArray resized_image = NDImageUtils.resize(img, resized_w, imgH);
        resized_image = resized_image.transpose(2, 0, 1).toType(DataType.FLOAT32,false);
        resized_image.divi(255f).subi(0.5f).divi(0.5f);
        NDArray padding_im = ctx.getNDManager().zeros(new Shape(imgC, imgH, imgW), DataType.FLOAT32);
        padding_im.set(new NDIndex(":,:,0:" + resized_w), resized_image);
        padding_im = padding_im.flip(0);
//        padding_im = padding_im.expandDims(0);
        return new NDList(padding_im);
    }
    @Override
    public Batchifier getBatchifier() {
        return Batchifier.fromString(batchifier);
    }
}
src/main/java/com/xindao/ocr/smartjavaai/model/plate/CRNNPlateRecModel.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,325 @@
package com.xindao.ocr.smartjavaai.model.plate;
import ai.djl.MalformedModelException;
import ai.djl.engine.Engine;
import ai.djl.inference.Predictor;
import ai.djl.modality.cv.Image;
import ai.djl.modality.cv.ImageFactory;
import ai.djl.modality.cv.output.DetectedObjects;
import ai.djl.repository.zoo.Criteria;
import ai.djl.repository.zoo.ModelNotFoundException;
import ai.djl.repository.zoo.ModelZoo;
import ai.djl.repository.zoo.ZooModel;
import cn.smartjavaai.common.entity.DetectionRectangle;
import cn.smartjavaai.common.entity.R;
import cn.smartjavaai.common.pool.PredictorFactory;
import cn.smartjavaai.common.utils.Base64ImageUtils;
import cn.smartjavaai.common.utils.FileUtils;
import cn.smartjavaai.common.utils.ImageUtils;
import cn.smartjavaai.common.utils.OpenCVUtils;
import com.xindao.ocr.smartjavaai.config.PlateRecModelConfig;
import com.xindao.ocr.smartjavaai.entity.PlateInfo;
import com.xindao.ocr.smartjavaai.entity.PlateResult;
import com.xindao.ocr.smartjavaai.enums.PlateType;
import com.xindao.ocr.smartjavaai.exception.OcrException;
import com.xindao.ocr.smartjavaai.model.plate.criteria.PlateRecCriterialFactory;
import com.xindao.ocr.smartjavaai.utils.OcrUtils;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.pool2.impl.GenericObjectPool;
import org.opencv.core.Core;
import org.opencv.core.Mat;
import org.opencv.core.Rect;
import org.opencv.core.Size;
import org.opencv.imgproc.Imgproc;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
/**
 * @author dwj
 */
@Slf4j
public class CRNNPlateRecModel implements PlateRecModel{
    private GenericObjectPool<Predictor<Image, PlateResult>> recPredictorPool;
    private ZooModel<Image, PlateResult> recModel;
    private PlateRecModelConfig config;
    @Override
    public void loadModel(PlateRecModelConfig config) {
        if(StringUtils.isBlank(config.getModelPath())){
            throw new OcrException("modelPath is null");
        }
        this.config = config;
        //初始化 æ£€æµ‹Criteria
        Criteria<Image, PlateResult> detCriteria = PlateRecCriterialFactory.createCriteria(config);
        try{
            recModel = ModelZoo.loadModel(detCriteria);
            // åˆ›å»ºæ± å­ï¼šæ¯ä¸ªçº¿ç¨‹ç‹¬äº« Predictor
            this.recPredictorPool = new GenericObjectPool<>(new PredictorFactory<>(recModel));
            int predictorPoolSize = config.getPredictorPoolSize();
            if(config.getPredictorPoolSize() <= 0){
                predictorPoolSize = Runtime.getRuntime().availableProcessors(); // é»˜è®¤ç­‰äºŽCPU核心数
            }
            recPredictorPool.setMaxTotal(predictorPoolSize);
            log.debug("当前设备: " + recModel.getNDManager().getDevice());
            log.debug("当前引擎: " + Engine.getInstance().getEngineName());
            log.debug("模型推理器线程池最大数量: " + predictorPoolSize);
        } catch (IOException | ModelNotFoundException | MalformedModelException e) {
            throw new OcrException("检测模型加载失败", e);
        }
    }
    @Override
    public R<List<PlateInfo>> recognize(String imagePath) {
        if(!FileUtils.isFileExists(imagePath)){
            return R.fail(R.Status.FILE_NOT_FOUND);
        }
        Image img = null;
        try {
            img = ImageFactory.getInstance().fromFile(Paths.get(imagePath));
            R<List<PlateInfo>> plateResult = recognize(img);
            return plateResult;
        } catch (IOException e) {
            throw new OcrException("无效的图片", e);
        } finally {
            ((Mat)img.getWrappedImage()).release();
        }
    }
    @Override
    public R<List<PlateInfo>> recognizeBase64(String base64Image) {
        if(StringUtils.isBlank(base64Image)){
            return R.fail(R.Status.INVALID_IMAGE);
        }
        byte[] imageData = Base64ImageUtils.base64ToImage(base64Image);
        return recognize(imageData);
    }
    @Override
    public R<List<PlateInfo>> recognize(BufferedImage image) {
        if(!ImageUtils.isImageValid(image)){
            return R.fail(R.Status.INVALID_IMAGE);
        }
        Image img = ImageFactory.getInstance().fromImage(OpenCVUtils.image2Mat(image));
        R<List<PlateInfo>> plateResult = recognize(img);
        ((Mat)img.getWrappedImage()).release();
        return plateResult;
    }
    @Override
    public R<List<PlateInfo>> recognize(byte[] imageData) {
        if(Objects.isNull(imageData)){
            return R.fail(R.Status.INVALID_IMAGE);
        }
        return recognize(new ByteArrayInputStream(imageData));
    }
    @Override
    public R<List<PlateInfo>> recognize(Image image) {
        if(Objects.isNull(config.getPlateDetModel())){
            return R.fail(R.Status.PARAM_ERROR.getCode(), "未指定车牌检测模型");
        }
        DetectedObjects detectedObjects = config.getPlateDetModel().detect(image);
        if(Objects.isNull(detectedObjects) || detectedObjects.getNumberOfObjects() == 0){
            return R.fail(R.Status.NO_OBJECT_DETECTED);
        }
        List<PlateInfo> plateInfoList = OcrUtils.convertToPlateInfo(detectedObjects, image);
        Predictor<Image, PlateResult> predictor = null;
        try {
            predictor = recPredictorPool.borrowObject();
            for (PlateInfo plateInfo : plateInfoList){
                DetectionRectangle detectionRectangle = plateInfo.getDetectionRectangle();
//                Image subImage = image.getSubImage(detectionRectangle.getX(), detectionRectangle.getY(), detectionRectangle.getWidth(), detectionRectangle.getHeight());
                //透视变换
                Image subImage = OcrUtils.transformAndCrop((Mat)image.getWrappedImage(), plateInfo.getBox());
                //双层车牌
                if(plateInfo.getPlateType() == PlateType.DOUBLE){
                    Mat mergeImage = getSplitMerge((Mat)subImage.getWrappedImage());
                    subImage = ImageFactory.getInstance().fromImage(mergeImage);
                }
                PlateResult plateResult = predictor.predict(subImage);
                if(Objects.nonNull(plateResult)){
                    plateInfo.setPlateNumber(plateResult.getPlateNo());
                    plateInfo.setPlateColor(plateResult.getPlateColor());
                }
            }
           return R.ok(plateInfoList);
        } catch (Exception e) {
            throw new OcrException("车牌识别错误", e);
        }finally {
            if (predictor != null) {
                try {
                    recPredictorPool.returnObject(predictor); //归还
                } catch (Exception e) {
                    log.warn("归还Predictor失败", e);
                    try {
                        predictor.close(); // å½’还失败才销毁
                    } catch (Exception ex) {
                        log.error("关闭Predictor失败", ex);
                    }
                }
            }
        }
    }
    /**
     * åŒå±‚车牌进行分割后识别
     * @param img
     * @return
     */
    private Mat getSplitMerge(Mat img) {
        int h = img.rows();
        int w = img.cols();
        // ä¸ŠåŠéƒ¨åˆ†ï¼šé«˜åº¦çš„前 5/12
        Rect upperRect = new Rect(0, 0, w, (int)(5.0 / 12 * h));
        Mat imgUpper = new Mat(img, upperRect);
        // ä¸‹åŠéƒ¨åˆ†ï¼šé«˜åº¦ä»Ž 1/3 å¼€å§‹
        Rect lowerRect = new Rect(0, (int)(1.0 / 3 * h), w, h - (int)(1.0 / 3 * h));
        Mat imgLower = new Mat(img, lowerRect);
        // å°†ä¸ŠåŠéƒ¨åˆ† resize åˆ°ä¸Žä¸‹åŠéƒ¨åˆ†ç›¸åŒå¤§å°
        Mat resizedUpper = new Mat();
        Size lowerSize = imgLower.size();
        Imgproc.resize(imgUpper, resizedUpper, lowerSize);
        // æ°´å¹³æ‹¼æŽ¥ï¼ˆå°†ä¸Šä¸‹æ‹¼æˆå·¦å³ï¼‰
        List<Mat> mergeList = new ArrayList<>();
        mergeList.add(resizedUpper);
        mergeList.add(imgLower);
        Mat merged = new Mat();
        Core.hconcat(mergeList, merged);
        return merged;
    }
    @Override
    public PlateResult recognizeCropped(Image image) {
        Predictor<Image, PlateResult> predictor = null;
        try {
            predictor = recPredictorPool.borrowObject();
            return predictor.predict(image);
        } catch (Exception e) {
            throw new OcrException("车牌检测错误", e);
        }finally {
            if (predictor != null) {
                try {
                    recPredictorPool.returnObject(predictor); //归还
                } catch (Exception e) {
                    log.warn("归还Predictor失败", e);
                    try {
                        predictor.close(); // å½’还失败才销毁
                    } catch (Exception ex) {
                        log.error("关闭Predictor失败", ex);
                    }
                }
            }
        }
    }
    @Override
    public R<List<PlateInfo>> recognize(InputStream inputStream) {
        if(Objects.isNull(inputStream)){
            return R.fail(R.Status.INVALID_IMAGE);
        }
        Image img = null;
        try {
            img = ImageFactory.getInstance().fromInputStream(inputStream);
            return recognize(img);
        } catch (IOException e) {
            throw new OcrException("无效图片输入流", e);
        } finally {
            if (img != null){
                ((Mat)img.getWrappedImage()).release();
            }
        }
    }
    @Override
    public R<Void> recognizeAndDraw(String imagePath, String outputPath) {
        if(!FileUtils.isFileExists(imagePath)){
            return R.fail(R.Status.FILE_NOT_FOUND);
        }
        Image img = null;
        try {
            img = ImageFactory.getInstance().fromFile(Paths.get(imagePath));
            R<List<PlateInfo>> plateResult = recognize(img);
            if(!plateResult.isSuccess()){
                return R.fail(plateResult.getCode(), plateResult.getMessage());
            }
            if(CollectionUtils.isEmpty(plateResult.getData())){
                return R.fail(R.Status.NO_OBJECT_DETECTED);
            }
            BufferedImage bufferedImage = OpenCVUtils.mat2Image((Mat)img.getWrappedImage());
            OcrUtils.drawPlateInfo(bufferedImage, plateResult.getData());
            ImageIO.write(bufferedImage, "png", new File(outputPath));
            return R.ok();
        } catch (IOException e) {
            throw new OcrException(e);
        } finally {
            if (img != null){
                ((Mat)img.getWrappedImage()).release();
            }
        }
    }
    @Override
    public R<BufferedImage> recognizeAndDraw(BufferedImage sourceImage) {
        if(!ImageUtils.isImageValid(sourceImage)){
            return R.fail(R.Status.INVALID_IMAGE);
        }
        try {
            R<List<PlateInfo>> plateResult = recognize(sourceImage);
            if(!plateResult.isSuccess()){
                return R.fail(plateResult.getCode(), plateResult.getMessage());
            }
            if(CollectionUtils.isEmpty(plateResult.getData())){
                return R.fail(R.Status.NO_OBJECT_DETECTED);
            }
            OcrUtils.drawPlateInfo(sourceImage, plateResult.getData());
            return R.ok(sourceImage);
        } catch (Exception e) {
            throw new OcrException("导出图片失败", e);
        }
    }
    @Override
    public GenericObjectPool<Predictor<Image, PlateResult>> getPool() {
        return recPredictorPool;
    }
    @Override
    public void close() throws Exception {
        try {
            if (recPredictorPool != null) {
                recPredictorPool.close();
            }
        } catch (Exception e) {
            log.warn("关闭 predictorPool å¤±è´¥", e);
        }
        try {
            if (recModel != null) {
                recModel.close();
            }
        } catch (Exception e) {
            log.warn("关闭 model å¤±è´¥", e);
        }
    }
}
src/main/java/com/xindao/ocr/smartjavaai/model/plate/PlateDetModel.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,112 @@
package com.xindao.ocr.smartjavaai.model.plate;
import ai.djl.inference.Predictor;
import ai.djl.modality.cv.Image;
import ai.djl.modality.cv.output.DetectedObjects;
import cn.smartjavaai.common.entity.R;
import com.xindao.ocr.smartjavaai.config.PlateDetModelConfig;
import com.xindao.ocr.smartjavaai.entity.PlateInfo;
import org.apache.commons.pool2.impl.GenericObjectPool;
import java.awt.image.BufferedImage;
import java.io.InputStream;
import java.util.List;
/**
 * è½¦ç‰Œæ£€æµ‹æ¨¡åž‹
 * @author dwj
 */
public interface PlateDetModel extends AutoCloseable{
    /**
     * åŠ è½½æ¨¡åž‹
     * @param config
     */
    void loadModel(PlateDetModelConfig config); // åŠ è½½æ¨¡åž‹
    /**
     * è½¦ç‰Œæ£€æµ‹
     * @param imagePath å›¾ç‰‡è·¯å¾„
     * @return
     */
    default R<List<PlateInfo>> detect(String imagePath) {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * è½¦ç‰Œæ£€æµ‹
     * @param inputStream
     * @return
     */
    default R<List<PlateInfo>> detect(InputStream inputStream) {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * è½¦ç‰Œæ£€æµ‹
     * @param base64Image
     * @return
     */
    default R<List<PlateInfo>> detectBase64(String base64Image){
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * è½¦ç‰Œæ£€æµ‹
     * @param image BufferedImage
     * @return
     */
    default R<List<PlateInfo>> detect(BufferedImage image) {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * è½¦ç‰Œæ£€æµ‹
     * @param imageData å›¾ç‰‡å­—节数组
     * @return
     */
    default R<List<PlateInfo>> detect(byte[] imageData) {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * è½¦ç‰Œæ£€æµ‹
     * @param image DJL Image
     * @return
     */
    default DetectedObjects detect(Image image){
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * æ£€æµ‹å¹¶ç»˜åˆ¶ç»“æžœ
     * @param imagePath å›¾ç‰‡è¾“入路径(包含文件名称)
     * @param outputPath å›¾ç‰‡è¾“出路径(包含文件名称)
     */
    default R<Void> detectAndDraw(String imagePath, String outputPath) {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * æ£€æµ‹å¹¶ç»˜åˆ¶ç»“æžœ
     * @param sourceImage
     * @return
     */
    default R<BufferedImage> detectAndDraw(BufferedImage sourceImage){
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    default GenericObjectPool<Predictor<Image, DetectedObjects>> getPool(){
        throw new UnsupportedOperationException("默认不支持该功能");
    }
}
src/main/java/com/xindao/ocr/smartjavaai/model/plate/PlateRecModel.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,116 @@
package com.xindao.ocr.smartjavaai.model.plate;
import ai.djl.inference.Predictor;
import ai.djl.modality.cv.Image;
import cn.smartjavaai.common.entity.R;
import com.xindao.ocr.smartjavaai.config.PlateRecModelConfig;
import com.xindao.ocr.smartjavaai.entity.PlateInfo;
import com.xindao.ocr.smartjavaai.entity.PlateResult;
import org.apache.commons.pool2.impl.GenericObjectPool;
import java.awt.image.BufferedImage;
import java.io.InputStream;
import java.util.List;
/**
 * è½¦ç‰Œè¯†åˆ«æ¨¡åž‹
 * @author dwj
 */
public interface PlateRecModel extends AutoCloseable{
    /**
     * åŠ è½½æ¨¡åž‹
     * @param config
     */
    void loadModel(PlateRecModelConfig config); // åŠ è½½æ¨¡åž‹
    /**
     * è½¦ç‰Œè¯†åˆ«
     * @param imagePath å›¾ç‰‡è·¯å¾„
     * @return
     */
    default R<List<PlateInfo>> recognize(String imagePath) {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * è½¦ç‰Œè¯†åˆ«
     * @param inputStream
     * @return
     */
    default R<List<PlateInfo>> recognize(InputStream inputStream) {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * è½¦ç‰Œè¯†åˆ«
     * @param base64Image
     * @return
     */
    default R<List<PlateInfo>> recognizeBase64(String base64Image){
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * è½¦ç‰Œè¯†åˆ«
     * @param image BufferedImage
     * @return
     */
    default R<List<PlateInfo>> recognize(BufferedImage image) {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * è½¦ç‰Œè¯†åˆ«
     * @param imageData å›¾ç‰‡å­—节数组
     * @return
     */
    default R<List<PlateInfo>> recognize(byte[] imageData) {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * è½¦ç‰Œè¯†åˆ«
     * @param image DJL Image
     * @return
     */
    default R<List<PlateInfo>> recognize(Image image){
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * è¯†åˆ«è£å‰ªåŽçš„图片
     * @return
     */
    default PlateResult recognizeCropped(Image image){
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * æ£€æµ‹å¹¶ç»˜åˆ¶ç»“æžœ
     * @param imagePath å›¾ç‰‡è¾“入路径(包含文件名称)
     * @param outputPath å›¾ç‰‡è¾“出路径(包含文件名称)
     */
    default R<Void> recognizeAndDraw(String imagePath, String outputPath) {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * æ£€æµ‹å¹¶ç»˜åˆ¶ç»“æžœ
     * @param sourceImage
     * @return
     */
    default R<BufferedImage> recognizeAndDraw(BufferedImage sourceImage){
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    default GenericObjectPool<Predictor<Image, PlateResult>> getPool() {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
}
src/main/java/com/xindao/ocr/smartjavaai/model/plate/Yolov5PlateDetModel.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,238 @@
package com.xindao.ocr.smartjavaai.model.plate;
import ai.djl.MalformedModelException;
import ai.djl.engine.Engine;
import ai.djl.inference.Predictor;
import ai.djl.modality.cv.Image;
import ai.djl.modality.cv.ImageFactory;
import ai.djl.modality.cv.output.DetectedObjects;
import ai.djl.repository.zoo.Criteria;
import ai.djl.repository.zoo.ModelNotFoundException;
import ai.djl.repository.zoo.ModelZoo;
import ai.djl.repository.zoo.ZooModel;
import cn.smartjavaai.common.entity.R;
import cn.smartjavaai.common.pool.PredictorFactory;
import cn.smartjavaai.common.utils.Base64ImageUtils;
import cn.smartjavaai.common.utils.FileUtils;
import cn.smartjavaai.common.utils.ImageUtils;
import cn.smartjavaai.common.utils.OpenCVUtils;
import com.xindao.ocr.smartjavaai.config.PlateDetModelConfig;
import com.xindao.ocr.smartjavaai.entity.PlateInfo;
import com.xindao.ocr.smartjavaai.exception.OcrException;
import com.xindao.ocr.smartjavaai.model.plate.criteria.PlateDetCriterialFactory;
import com.xindao.ocr.smartjavaai.utils.OcrUtils;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.pool2.impl.GenericObjectPool;
import org.opencv.core.Mat;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import java.util.Objects;
/**
 * Yolov5 è½¦ç‰Œæ£€æµ‹æ¨¡åž‹
 * @author dwj
 * @date 2025/7/23
 */
@Slf4j
public class Yolov5PlateDetModel implements PlateDetModel{
    private GenericObjectPool<Predictor<Image, DetectedObjects>> detPredictorPool;
    private ZooModel<Image, DetectedObjects> detectionModel;
    private PlateDetModelConfig config;
    @Override
    public void loadModel(PlateDetModelConfig config) {
        if(StringUtils.isBlank(config.getModelPath())){
            throw new OcrException("modelPath is null");
        }
        this.config = config;
        //初始化 æ£€æµ‹Criteria
        Criteria<Image, DetectedObjects> detCriteria = PlateDetCriterialFactory.createCriteria(config);
        try{
            detectionModel = ModelZoo.loadModel(detCriteria);
            // åˆ›å»ºæ± å­ï¼šæ¯ä¸ªçº¿ç¨‹ç‹¬äº« Predictor
            this.detPredictorPool = new GenericObjectPool<>(new PredictorFactory<>(detectionModel));
            int predictorPoolSize = config.getPredictorPoolSize();
            if(config.getPredictorPoolSize() <= 0){
                predictorPoolSize = Runtime.getRuntime().availableProcessors(); // é»˜è®¤ç­‰äºŽCPU核心数
            }
            detPredictorPool.setMaxTotal(predictorPoolSize);
            log.debug("当前设备: " + detectionModel.getNDManager().getDevice());
            log.debug("当前引擎: " + Engine.getInstance().getEngineName());
            log.debug("模型推理器线程池最大数量: " + predictorPoolSize);
        } catch (IOException | ModelNotFoundException | MalformedModelException e) {
            throw new OcrException("检测模型加载失败", e);
        }
    }
    @Override
    public R<List<PlateInfo>> detect(String imagePath) {
        if(!FileUtils.isFileExists(imagePath)){
            return R.fail(R.Status.FILE_NOT_FOUND);
        }
        Image img = null;
        try {
            img = ImageFactory.getInstance().fromFile(Paths.get(imagePath));
        } catch (IOException e) {
            throw new OcrException("无效的图片", e);
        }
        DetectedObjects detectedObjects = detect(img);
        if (Objects.isNull(detectedObjects) || detectedObjects.getNumberOfObjects() == 0){
            return R.fail(R.Status.NO_OBJECT_DETECTED);
        }
        List<PlateInfo> plateInfoList = OcrUtils.convertToPlateInfo(detectedObjects, img);
        ((Mat)img.getWrappedImage()).release();
        return R.ok(plateInfoList);
    }
    @Override
    public R<List<PlateInfo>> detectBase64(String base64Image) {
        if(StringUtils.isBlank(base64Image)){
            return R.fail(R.Status.INVALID_IMAGE);
        }
        byte[] imageData = Base64ImageUtils.base64ToImage(base64Image);
        return detect(imageData);
    }
    @Override
    public R<List<PlateInfo>> detect(BufferedImage image) {
        if(!ImageUtils.isImageValid(image)){
            return R.fail(R.Status.INVALID_IMAGE);
        }
        Image img = ImageFactory.getInstance().fromImage(OpenCVUtils.image2Mat(image));
        DetectedObjects detectedObjects = detect(img);
        if (Objects.isNull(detectedObjects) || detectedObjects.getNumberOfObjects() == 0){
            return R.fail(R.Status.NO_OBJECT_DETECTED);
        }
        List<PlateInfo> plateInfoList = OcrUtils.convertToPlateInfo(detectedObjects, img);
        ((Mat)img.getWrappedImage()).release();
        return R.ok(plateInfoList);
    }
    @Override
    public R<List<PlateInfo>> detect(byte[] imageData) {
        if(Objects.isNull(imageData)){
            return R.fail(R.Status.INVALID_IMAGE);
        }
        return detect(new ByteArrayInputStream(imageData));
    }
    @Override
    public DetectedObjects detect(Image image) {
        Predictor<Image, DetectedObjects> predictor = null;
        try {
            predictor = detPredictorPool.borrowObject();
            return predictor.predict(image);
        } catch (Exception e) {
            throw new OcrException("车牌检测错误", e);
        }finally {
            if (predictor != null) {
                try {
                    detPredictorPool.returnObject(predictor); //归还
                } catch (Exception e) {
                    log.warn("归还Predictor失败", e);
                    try {
                        predictor.close(); // å½’还失败才销毁
                    } catch (Exception ex) {
                        log.error("关闭Predictor失败", ex);
                    }
                }
            }
        }
    }
    @Override
    public R<List<PlateInfo>> detect(InputStream inputStream) {
        if(Objects.isNull(inputStream)){
            return R.fail(R.Status.INVALID_IMAGE);
        }
        try {
            Image img = ImageFactory.getInstance().fromInputStream(inputStream);
            DetectedObjects detection = detect(img);
            List<PlateInfo> plateInfoList = OcrUtils.convertToPlateInfo(detection, img);
            ((Mat)img.getWrappedImage()).release();
            return R.ok(plateInfoList);
        } catch (IOException e) {
            throw new OcrException("无效图片输入流", e);
        }
    }
    @Override
    public R<Void> detectAndDraw(String imagePath, String outputPath) {
        if(!FileUtils.isFileExists(imagePath)){
            return R.fail(R.Status.FILE_NOT_FOUND);
        }
        try {
            Image img = ImageFactory.getInstance().fromFile(Paths.get(imagePath));
            DetectedObjects detectedObjects = detect(img);
            if(Objects.isNull(detectedObjects) || detectedObjects.getNumberOfObjects() == 0){
                return R.fail(R.Status.NO_FACE_DETECTED);
            }
            img.drawBoundingBoxes(detectedObjects);
            Path output = Paths.get(outputPath);
            log.debug("Saving to {}", output.toAbsolutePath().toString());
            img.save(Files.newOutputStream(output), "png");
            return R.ok();
        } catch (IOException e) {
            throw new OcrException(e);
        }
    }
    @Override
    public R<BufferedImage> detectAndDraw(BufferedImage sourceImage) {
        if(!ImageUtils.isImageValid(sourceImage)){
            return R.fail(R.Status.INVALID_IMAGE);
        }
        Image img = ImageFactory.getInstance().fromImage(OpenCVUtils.image2Mat(sourceImage));
        DetectedObjects detectedObjects = detect(img);
        if(Objects.isNull(detectedObjects) || detectedObjects.getNumberOfObjects() == 0){
            return R.fail(R.Status.NO_FACE_DETECTED);
        }
        img.drawBoundingBoxes(detectedObjects);
        try {
            ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
            // è°ƒç”¨ save æ–¹æ³•å°† Image å†™å…¥å­—节流
            img.save(outputStream, "png");
            // å°†å­—节流转换为 BufferedImage
            byte[] imageBytes = outputStream.toByteArray();
            return R.ok(ImageIO.read(new ByteArrayInputStream(imageBytes)));
        } catch (IOException e) {
            throw new OcrException("导出图片失败", e);
        }
    }
    @Override
    public GenericObjectPool<Predictor<Image, DetectedObjects>> getPool() {
        return detPredictorPool;
    }
    @Override
    public void close() throws Exception {
        try {
            if (detPredictorPool != null) {
                detPredictorPool.close();
            }
        } catch (Exception e) {
            log.warn("关闭 predictorPool å¤±è´¥", e);
        }
        try {
            if (detectionModel != null) {
                detectionModel.close();
            }
        } catch (Exception e) {
            log.warn("关闭 model å¤±è´¥", e);
        }
    }
}
src/main/java/com/xindao/ocr/smartjavaai/model/plate/criteria/PlateDetCriterialFactory.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,71 @@
package com.xindao.ocr.smartjavaai.model.plate.criteria;
import ai.djl.Device;
import ai.djl.modality.cv.Image;
import ai.djl.modality.cv.output.DetectedObjects;
import ai.djl.repository.zoo.Criteria;
import ai.djl.training.util.ProgressBar;
import cn.smartjavaai.common.enums.DeviceEnum;
import com.xindao.ocr.smartjavaai.config.PlateDetModelConfig;
import com.xindao.ocr.smartjavaai.enums.PlateDetModelEnum;
import com.xindao.ocr.smartjavaai.model.plate.translator.Yolo5PlateDetectTranslator;
import com.xindao.ocr.smartjavaai.model.plate.translator.Yolov7PlateDetectTranslator;
import org.apache.commons.lang3.StringUtils;
import java.nio.file.Paths;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
/**
 * @author dwj
 * @date 2025/7/8
 */
public class PlateDetCriterialFactory {
    public static Criteria<Image, DetectedObjects> createCriteria(PlateDetModelConfig config) {
        Device device = null;
        if(!Objects.isNull(config.getDevice())){
            device = config.getDevice() == DeviceEnum.CPU ? Device.cpu() : Device.gpu(config.getGpuId());
        }
        Criteria<Image, DetectedObjects> criteria = null;
        ConcurrentHashMap params = new ConcurrentHashMap<String, String>();
        params.putAll(config.getCustomParams());
        if(StringUtils.isNotBlank(config.getBatchifier())){
            params.put("batchifier", config.getBatchifier());
        }
        if(config.getModelEnum() == PlateDetModelEnum.YOLOV5){
            criteria =
                    Criteria.builder()
                            .optEngine("OnnxRuntime")
                            .setTypes(Image.class, DetectedObjects.class)
                            .optModelPath(Paths.get(config.getModelPath()))
                            .optTranslator(new Yolo5PlateDetectTranslator(params))
                            .optDevice(device)
                            .optProgress(new ProgressBar())
                            .build();
        }else if (config.getModelEnum() == PlateDetModelEnum.YOLOV7){
            criteria =
                    Criteria.builder()
                            .optEngine("OnnxRuntime")
                            .setTypes(Image.class, DetectedObjects.class)
                            .optModelPath(Paths.get(config.getModelPath()))
                            .optTranslator(new Yolov7PlateDetectTranslator(params))
                            .optDevice(device)
                            .optProgress(new ProgressBar())
                            .build();
        }
//        else if (config.getModelEnum() == PlateDetModelEnum.YOLOV8){
//            criteria =
//                    Criteria.builder()
//                            .optEngine("OnnxRuntime")
//                            .setTypes(Image.class, DetectedObjects.class)
//                            .optModelPath(Paths.get(config.getModelPath()))
//                            .optTranslator(new Yolov8PlateDetectTranslator(params))
//                            .optDevice(device)
//                            .optProgress(new ProgressBar())
//                            .build();
//        }
        return criteria;
    }
}
src/main/java/com/xindao/ocr/smartjavaai/model/plate/criteria/PlateRecCriterialFactory.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,42 @@
package com.xindao.ocr.smartjavaai.model.plate.criteria;
import ai.djl.Device;
import ai.djl.modality.cv.Image;
import ai.djl.repository.zoo.Criteria;
import ai.djl.training.util.ProgressBar;
import cn.smartjavaai.common.enums.DeviceEnum;
import com.xindao.ocr.smartjavaai.config.PlateRecModelConfig;
import com.xindao.ocr.smartjavaai.entity.PlateResult;
import com.xindao.ocr.smartjavaai.enums.PlateRecModelEnum;
import com.xindao.ocr.smartjavaai.model.plate.translator.CRNNPlateRecTranslator;
import java.nio.file.Paths;
import java.util.Objects;
/**
 * @author dwj
 * @date 2025/7/8
 */
public class PlateRecCriterialFactory {
    public static Criteria<Image, PlateResult> createCriteria(PlateRecModelConfig config) {
        Device device = null;
        if(!Objects.isNull(config.getDevice())){
            device = config.getDevice() == DeviceEnum.CPU ? Device.cpu() : Device.gpu(config.getGpuId());
        }
        Criteria<Image, PlateResult> criteria = null;
        if(config.getModelEnum() == PlateRecModelEnum.PLATE_REC_CRNN){
            criteria =
                    Criteria.builder()
                            .optEngine("OnnxRuntime")
                            .setTypes(Image.class, PlateResult.class)
                            .optModelPath(Paths.get(config.getModelPath()))
                            .optTranslator(new CRNNPlateRecTranslator())
                            .optDevice(device)
                            .optProgress(new ProgressBar())
                            .build();
        }
        return criteria;
    }
}
src/main/java/com/xindao/ocr/smartjavaai/model/plate/translator/CRNNPlateRecTranslator.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,88 @@
package com.xindao.ocr.smartjavaai.model.plate.translator;
import ai.djl.modality.cv.Image;
import ai.djl.modality.cv.util.NDImageUtils;
import ai.djl.ndarray.NDArray;
import ai.djl.ndarray.NDList;
import ai.djl.ndarray.NDManager;
import ai.djl.ndarray.types.DataType;
import ai.djl.translate.Batchifier;
import ai.djl.translate.Translator;
import ai.djl.translate.TranslatorContext;
import com.xindao.ocr.smartjavaai.entity.PlateResult;
import java.util.ArrayList;
import java.util.List;
/**
 * @author dwj
 */
public class CRNNPlateRecTranslator implements Translator<Image, PlateResult> {
    private static final String plateName = "#京沪津渝冀晋蒙辽吉黑苏浙皖闽赣鲁豫鄂湘粤桂琼川贵云藏陕甘青宁新学警港澳挂使领民航危0123456789ABCDEFGHJKLMNPQRSTUVWXYZ险品";
    private static final String[] plateColors = {"黑色", "蓝色", "绿色", "白色", "黄色"};
    private static final float MEAN = 0.588f;
    private static final float STD = 0.193f;
    @Override
    public NDList processInput(TranslatorContext ctx, Image input) {
        NDManager manager = ctx.getNDManager();
        // Resize to (168, 48)
        NDArray array = input.toNDArray(manager, Image.Flag.COLOR);
        array = NDImageUtils.resize(array, 168, 48);
        // Normalize
        array = array.toType(DataType.FLOAT32, false)
                .div(255f)
                .sub(MEAN)
                .div(STD);
        // HWC to CHW
        array = array.transpose(2, 0, 1);
        array = array.expandDims(0); // batch dimension
        return new NDList(array);
    }
    @Override
    public PlateResult processOutput(TranslatorContext ctx, NDList list) {
        NDArray plateOutput = list.get(0);  // shape: [1, T, num_classes]
        NDArray colorOutput = list.get(1);  // shape: [1, num_colors]
        int[] plateIdx = plateOutput.argMax(-1)
                .toType(DataType.INT32, false)
                .toIntArray();
        int colorIdx = colorOutput.argMax(1).toType(DataType.INT32, false).toIntArray()[0];
        String plateNo = decodePlate(plateIdx);
        String plateColor = plateColors[colorIdx];
        return new PlateResult(plateNo, plateColor);
    }
    private String decodePlate(int[] preds) {
        int pre = 0;
        List<Integer> newPreds = new ArrayList<>();
        for (int idx : preds) {
            if (idx != 0 && idx != pre) {
                newPreds.add(idx);
            }
            pre = idx;
        }
        StringBuilder sb = new StringBuilder();
        for (int i : newPreds) {
            if (i >= 0 && i < plateName.length()) {
                sb.append(plateName.charAt(i));
            }
        }
        return sb.toString();
    }
    @Override
    public Batchifier getBatchifier() {
        return null; // éžæ‰¹é‡ä»»åŠ¡
    }
}
src/main/java/com/xindao/ocr/smartjavaai/model/plate/translator/Yolo5PlateDetectTranslator.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,194 @@
package com.xindao.ocr.smartjavaai.model.plate.translator;
import ai.djl.modality.cv.Image;
import ai.djl.modality.cv.output.BoundingBox;
import ai.djl.modality.cv.output.DetectedObjects;
import ai.djl.modality.cv.output.Landmark;
import ai.djl.modality.cv.output.Point;
import ai.djl.ndarray.NDArray;
import ai.djl.ndarray.NDArrays;
import ai.djl.ndarray.NDList;
import ai.djl.ndarray.NDManager;
import ai.djl.ndarray.types.DataType;
import ai.djl.translate.Batchifier;
import ai.djl.translate.Translator;
import ai.djl.translate.TranslatorContext;
import cn.smartjavaai.common.utils.LetterBoxUtils;
import cn.smartjavaai.common.utils.NMSUtils;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
/**
 * @author dwj
 */
public class Yolo5PlateDetectTranslator implements Translator<Image, DetectedObjects> {
    private int inputSize = 640;
    private float minConfThreshold = 0.3f;
    private float iouThreshold = 0.5f;
    private float confThreshold = 0;
    private int imageWidth;
    private int imageHeight;
    private int topK;
    public Yolo5PlateDetectTranslator(Map<String, ?> arguments) {
        confThreshold =
                arguments.containsKey("confThreshold")
                        ? Integer.parseInt(arguments.get("confThreshold").toString())
                        : 0.3f;
        iouThreshold =
                arguments.containsKey("iouThreshold")
                        ? Integer.parseInt(arguments.get("iouThreshold").toString())
                        : 0.5f;
        topK = arguments.containsKey("topk")
                ? Integer.parseInt(arguments.get("topk").toString())
                : 100;
    }
    @Override
    public NDList processInput(TranslatorContext ctx, Image input) {
        NDManager manager = ctx.getNDManager();
        NDArray array = input.toNDArray(manager, Image.Flag.COLOR);
        imageWidth = (int) array.getShape().get(1);
        imageHeight = (int) array.getShape().get(0);
        //Letter box resize 640x640 with padding (保持比例,补边缘)
        LetterBoxUtils.ResizeResult letterBoxResult = LetterBoxUtils.letterbox(manager, array, inputSize, inputSize, 114f, LetterBoxUtils.PaddingPosition.CENTER);
        ctx.setAttachment("letterBoxResult", letterBoxResult);
        array = letterBoxResult.image;
        // è½¬ä¸º float32 ä¸”归一化到 0~1
        array = array.toType(DataType.FLOAT32, false).div(255f); // HWC
        // HWC -> CHW
        array = array.transpose(2, 0, 1); // CHW
        return new NDList(array.expandDims(0));
    }
    @Override
    public DetectedObjects processOutput(TranslatorContext ctx, NDList list) {
        NDManager manager = ctx.getNDManager();
        LetterBoxUtils.ResizeResult letterBoxResult = (LetterBoxUtils.ResizeResult)ctx.getAttachment("letterBoxResult");
        //[x_center, y_center, w, h, obj_conf, 8个关键点, class1_conf, class2_conf]
        //目标置信度 obj_conf 5:13 å…³é”®ç‚¹ [13:15]分类得分:单层车牌 / åŒå±‚车牌
        NDArray dets = list.singletonOrThrow();
        //置信度过滤 (1,25200, 15)
        NDArray dets0 = dets.get(0);
        NDArray conf = dets0.get(":, 4"); // shape [N]
        NDArray mask = conf.gt(minConfThreshold);
        //筛选出符合条件的框(17,15)
        NDArray detsFiltered = dets0.get(mask); // ç­›æŽ‰ä½Žç½®ä¿¡åº¦
        //把分类得分 [13:15] * ç½®ä¿¡åº¦ [4:5] åšè”合概率
        NDArray clsLogits = detsFiltered.get(":, 13:15"); // (N, 2)
        NDArray confFiltered = detsFiltered.get(":, 4").reshape(-1, 1); // (N, 1)
        clsLogits = clsLogits.mul(confFiltered); // (N, 2),变成 obj_conf * class_conf
        NDArray jointScore = clsLogits.max(new int[]{1}); // shape (N,)
        // è”合过滤
        NDArray jointMask = jointScore.gt(confThreshold);
        detsFiltered = detsFiltered.get(jointMask);
        if (detsFiltered.isEmpty()) {
            return new DetectedObjects(Collections.emptyList(), Collections.emptyList(), Collections.emptyList());
        }
        clsLogits = clsLogits.get(jointMask);
        //中心点框 [x,y,w,h] âž” å·¦ä¸Šå³ä¸‹ [x1,y1,x2,y2]
        NDArray xywh = detsFiltered.get(":, 0:4"); // (N, 4)
        NDArray halfWH = xywh.get(":, 2:4").div(2); // (N, 2)
        NDArray xy1 = xywh.get(":, 0:2").sub(halfWH); // (N, 2)
        NDArray xy2 = xywh.get(":, 0:2").add(halfWH); // (N, 2)
        NDArray boxes = NDArrays.concat(new NDList(xy1, xy2), 1); // (N, 4)
        // åˆ†ç±»å¾—分最大值:score (N, 1),对应类别 index (N, 1)
        NDArray scores = clsLogits.max(new int[]{1}, true); // (N, 1)
        NDArray indices = clsLogits.argMax(1).reshape(-1, 1).toType(DataType.FLOAT32, false); // (N, 1)
        // å…³é”®ç‚¹åæ ‡ [5:13]
        NDArray keyPoints = detsFiltered.get(":, 5:13"); // (N, 8)
        // æ‹¼æˆæœ€ç»ˆç»“果:(x1, y1, x2, y2, score, 8关键点, index)
        NDArray output = NDArrays.concat(new NDList(boxes, scores, keyPoints, indices), 1); // (N, 14)
        // NMS è¿‡æ»¤æŽ‰é‡å æ¡†
        int[] keepIndices = NMSUtils.nms(boxes, scores.squeeze(), iouThreshold); // scores.squeeze() âž (N,)】
        if (keepIndices.length == 0) {
            return new DetectedObjects(Collections.emptyList(), Collections.emptyList(), Collections.emptyList());
        }
        NDArray kept = output.get(manager.create(keepIndices));
        // å¦‚果超过 topK,则截断
        if (keepIndices.length > topK) {
            int[] topkIndices = new int[topK];
            System.arraycopy(keepIndices, 0, topkIndices, 0, topK);
            keepIndices = topkIndices;
        }
        //恢复原图坐标(除回比例,减掉 padding)
        NDArray restored = LetterBoxUtils.restoreBox(kept, letterBoxResult.r, letterBoxResult.left, letterBoxResult.top, 5,8);
        List<String> classNames = new ArrayList<>();
        List<Double> probabilities = new ArrayList<>();
        List<BoundingBox> boundingBoxes = new ArrayList<>();
        float[] flatData = restored.toFloatArray();
        long[] shape = restored.getShape().getShape(); // æ¯”如 (N, 14)
        int rows = (int) shape[0];
        int cols = (int) shape[1];
        // æŠŠä¸€ç»´æ•°ç»„重组为二维数组
        float[][] data = new float[rows][cols];
        for (int i = 0; i < rows; i++) {
            System.arraycopy(flatData, i * cols, data[i], 0, cols);
        }
        for (float[] row : data) {
            // row结构:(x1, y1, x2, y2, score, kp1,..., kp8, classIndex)
            float x1 = row[0];
            float y1 = row[1];
            float x2 = row[2];
            float y2 = row[3];
            float score = row[4];
            int classIndex = (int) row[13];
            double prob = score;
            String className = classIndex == 0 ? "single" : "double";
            // è½¬ç›¸å¯¹åæ ‡ï¼ŒDJL的Rectangle用比例坐标(0~1)
            double rectX = x1 / imageWidth;
            double rectY = y1 / imageHeight;
            double rectW = (x2 - x1) / imageWidth;
            double rectH = (y2 - y1) / imageHeight;
            // æž„建 Polygon å››ä¸ªè§’点
            List<Point> pointsSrc = new ArrayList<>();
            pointsSrc.add(new Point(row[5], row[6]));
            pointsSrc.add(new Point(row[7], row[8]));
            pointsSrc.add(new Point(row[9], row[10]));
            pointsSrc.add(new Point(row[11], row[12]));
            Landmark box = new Landmark(rectX, rectY, rectW, rectH, pointsSrc);
            classNames.add(className);
            probabilities.add(prob);
            boundingBoxes.add(box);
        }
        DetectedObjects detectedObjects = new DetectedObjects(classNames, probabilities, boundingBoxes);
        return detectedObjects;
    }
    @Override
    public Batchifier getBatchifier() {
        return null;
    }
}
src/main/java/com/xindao/ocr/smartjavaai/model/plate/translator/Yolov7PlateDetectTranslator.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,192 @@
package com.xindao.ocr.smartjavaai.model.plate.translator;
import ai.djl.modality.cv.Image;
import ai.djl.modality.cv.output.BoundingBox;
import ai.djl.modality.cv.output.DetectedObjects;
import ai.djl.modality.cv.output.Landmark;
import ai.djl.modality.cv.output.Point;
import ai.djl.ndarray.NDArray;
import ai.djl.ndarray.NDArrays;
import ai.djl.ndarray.NDList;
import ai.djl.ndarray.NDManager;
import ai.djl.ndarray.types.DataType;
import ai.djl.translate.Batchifier;
import ai.djl.translate.Translator;
import ai.djl.translate.TranslatorContext;
import cn.smartjavaai.common.utils.LetterBoxUtils;
import cn.smartjavaai.common.utils.NMSUtils;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
 * @author dwj
 */
public class Yolov7PlateDetectTranslator implements Translator<Image, DetectedObjects> {
    private int inputSize = 640;
    private float minConfThreshold = 0.3f;
    private float iouThreshold = 0.5f;
    private float confThreshold = 0;
    private int imageWidth;
    private int imageHeight;
    private int topK;
    private LetterBoxUtils.ResizeResult letterBoxResult;
    public Yolov7PlateDetectTranslator(Map<String, ?> arguments) {
        confThreshold =
                arguments.containsKey("confThreshold")
                        ? Integer.parseInt(arguments.get("confThreshold").toString())
                        : 0.3f;
        iouThreshold =
                arguments.containsKey("iouThreshold")
                        ? Integer.parseInt(arguments.get("iouThreshold").toString())
                        : 0.5f;
        topK = arguments.containsKey("topk")
                ? Integer.parseInt(arguments.get("topk").toString())
                : 100;
    }
    @Override
    public NDList processInput(TranslatorContext ctx, Image input) {
        NDManager manager = ctx.getNDManager();
        NDArray array = input.toNDArray(manager, Image.Flag.COLOR);
        imageWidth = (int) array.getShape().get(1);
        imageHeight = (int) array.getShape().get(0);
        //Letter box resize 640x640 with padding (保持比例,补边缘)
        letterBoxResult = LetterBoxUtils.letterbox(manager, array, inputSize, inputSize, 114f, LetterBoxUtils.PaddingPosition.CENTER);
        array = letterBoxResult.image;
        // è½¬ä¸º float32 ä¸”归一化到 0~1
        array = array.toType(DataType.FLOAT32, false).div(255f); // HWC
        // HWC -> CHW
        array = array.transpose(2, 0, 1); // CHW
        return new NDList(array.expandDims(0));
    }
    @Override
    public DetectedObjects processOutput(TranslatorContext ctx, NDList list) {
        NDManager manager = ctx.getNDManager();
        int num_cls = 2;
        //[x_center, y_center, w, h, obj_conf, class1_conf, class2_conf,8个关键点]
        //目标置信度 obj_conf 5:13 å…³é”®ç‚¹ [13:15]分类得分:单层车牌 / åŒå±‚车牌
        NDArray dets = list.singletonOrThrow();
        //置信度过滤 (1,25200, 15)
        NDArray dets0 = dets.get(0);
        NDArray conf = dets0.get(":, 4"); // shape [N]
        NDArray mask = conf.gt(minConfThreshold);
        //筛选出符合条件的框(17,15)
        NDArray detsFiltered = dets0.get(mask); // ç­›æŽ‰ä½Žç½®ä¿¡åº¦
        //把分类得分 [5:7] * ç½®ä¿¡åº¦ [4:5] åšè”合概率
        NDArray clsLogits = detsFiltered.get(":, 5:7"); // (N, 2)
        NDArray confFiltered = detsFiltered.get(":, 4").reshape(-1, 1); // (N, 1)
        clsLogits = clsLogits.mul(confFiltered); // (N, 2),变成 obj_conf * class_conf
        NDArray jointScore = clsLogits.max(new int[]{1}); // shape (N,)
        // è”合过滤
        NDArray jointMask = jointScore.gt(confThreshold);
        detsFiltered = detsFiltered.get(jointMask);
        clsLogits = clsLogits.get(jointMask);
        //中心点框 [x,y,w,h] âž” å·¦ä¸Šå³ä¸‹ [x1,y1,x2,y2]
        NDArray xywh = detsFiltered.get(":, 0:4"); // (N, 4)
        NDArray halfWH = xywh.get(":, 2:4").div(2); // (N, 2)
        NDArray xy1 = xywh.get(":, 0:2").sub(halfWH); // (N, 2)
        NDArray xy2 = xywh.get(":, 0:2").add(halfWH); // (N, 2)
        NDArray boxes = NDArrays.concat(new NDList(xy1, xy2), 1); // (N, 4)
        // åˆ†ç±»å¾—分最大值:score (N, 1),对应类别 index (N, 1)
        NDArray scores = clsLogits.max(new int[]{1}, true); // (N, 1)
        NDArray indices = clsLogits.argMax(1).reshape(-1, 1).toType(DataType.FLOAT32, false); // (N, 1)
        // å…³é”®ç‚¹åæ ‡ [7,8,10,11,13,14,16,17]
        NDArray keyPoints = NDArrays.concat(new NDList(
                detsFiltered.get(":, 7:8"),
                detsFiltered.get(":, 8:9"),
                detsFiltered.get(":, 10:11"),
                detsFiltered.get(":, 11:12"),
                detsFiltered.get(":, 13:14"),
                detsFiltered.get(":, 14:15"),
                detsFiltered.get(":, 16:17"),
                detsFiltered.get(":, 17:18")
        ), 1); // æ‹¼æˆ (N, 8)
        // æ‹¼æˆæœ€ç»ˆç»“果:(x1, y1, x2, y2, score, 8关键点, index)
        NDArray output = NDArrays.concat(new NDList(boxes, scores, keyPoints, indices), 1); // (N, 14)
        // NMS è¿‡æ»¤æŽ‰é‡å æ¡†
        int[] keepIndices = NMSUtils.nms(boxes, scores.squeeze(), iouThreshold); // scores.squeeze() âž (N,)
        NDArray kept = output.get(manager.create(keepIndices));
        // å¦‚果超过 topK,则截断
        if (keepIndices.length > topK) {
            int[] topkIndices = new int[topK];
            System.arraycopy(keepIndices, 0, topkIndices, 0, topK);
            keepIndices = topkIndices;
        }
        //恢复原图坐标(除回比例,减掉 padding)
        NDArray restored = LetterBoxUtils.restoreBox(kept, letterBoxResult.r, letterBoxResult.left, letterBoxResult.top, 5,8);
        List<String> classNames = new ArrayList<>();
        List<Double> probabilities = new ArrayList<>();
        List<BoundingBox> boundingBoxes = new ArrayList<>();
        float[] flatData = restored.toFloatArray();
        long[] shape = restored.getShape().getShape(); // æ¯”如 (N, 14)
        int rows = (int) shape[0];
        int cols = (int) shape[1];
        // æŠŠä¸€ç»´æ•°ç»„重组为二维数组
        float[][] data = new float[rows][cols];
        for (int i = 0; i < rows; i++) {
            System.arraycopy(flatData, i * cols, data[i], 0, cols);
        }
        for (float[] row : data) {
            // row结构:(x1, y1, x2, y2, score, kp1,..., kp8, classIndex)
            float x1 = row[0];
            float y1 = row[1];
            float x2 = row[2];
            float y2 = row[3];
            float score = row[4];
            int classIndex = (int) row[13];
            double prob = score;
            String className = classIndex == 0 ? "single" : "double";
            // è½¬ç›¸å¯¹åæ ‡ï¼ŒDJL的Rectangle用比例坐标(0~1)
            double rectX = x1 / imageWidth;
            double rectY = y1 / imageHeight;
            double rectW = (x2 - x1) / imageWidth;
            double rectH = (y2 - y1) / imageHeight;
            // æž„建 Polygon å››ä¸ªè§’点
            List<Point> pointsSrc = new ArrayList<>();
            pointsSrc.add(new Point(row[5], row[6]));
            pointsSrc.add(new Point(row[7], row[8]));
            pointsSrc.add(new Point(row[9], row[10]));
            pointsSrc.add(new Point(row[11], row[12]));
            Landmark box = new Landmark(rectX, rectY, rectW, rectH, pointsSrc);
            classNames.add(className);
            probabilities.add(prob);
            boundingBoxes.add(box);
        }
        DetectedObjects detectedObjects = new DetectedObjects(classNames, probabilities, boundingBoxes);
        return detectedObjects;
    }
    @Override
    public Batchifier getBatchifier() {
        return null;
    }
}
src/main/java/com/xindao/ocr/smartjavaai/model/plate/translator/Yolov8PlateDetectTranslator.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,188 @@
package com.xindao.ocr.smartjavaai.model.plate.translator;
import ai.djl.modality.cv.Image;
import ai.djl.modality.cv.output.BoundingBox;
import ai.djl.modality.cv.output.DetectedObjects;
import ai.djl.modality.cv.output.Rectangle;
import ai.djl.ndarray.NDArray;
import ai.djl.ndarray.NDArrays;
import ai.djl.ndarray.NDList;
import ai.djl.ndarray.NDManager;
import ai.djl.ndarray.types.DataType;
import ai.djl.translate.Batchifier;
import ai.djl.translate.Translator;
import ai.djl.translate.TranslatorContext;
import cn.smartjavaai.common.utils.LetterBoxUtils;
import cn.smartjavaai.common.utils.NMSUtils;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
 * @author dwj
 */
public class Yolov8PlateDetectTranslator implements Translator<Image, DetectedObjects> {
    private int inputSize = 640;
    private float minConfThreshold = 0.3f;
    private float iouThreshold = 0.5f;
    private float confThreshold = 0;
    private int imageWidth;
    private int imageHeight;
    private int topK;
    private LetterBoxUtils.ResizeResult letterBoxResult;
    public Yolov8PlateDetectTranslator(Map<String, ?> arguments) {
        confThreshold =
                arguments.containsKey("confThreshold")
                        ? Integer.parseInt(arguments.get("confThreshold").toString())
                        : 0.3f;
        iouThreshold =
                arguments.containsKey("iouThreshold")
                        ? Integer.parseInt(arguments.get("iouThreshold").toString())
                        : 0.5f;
        topK = arguments.containsKey("topk")
                ? Integer.parseInt(arguments.get("topk").toString())
                : 100;
    }
    @Override
    public NDList processInput(TranslatorContext ctx, Image input) {
        NDManager manager = ctx.getNDManager();
        NDArray array = input.toNDArray(manager, Image.Flag.COLOR);
        imageWidth = (int) array.getShape().get(1);
        imageHeight = (int) array.getShape().get(0);
        //Letter box resize 640x640 with padding (保持比例,补边缘)
        letterBoxResult = LetterBoxUtils.letterbox(manager, array, inputSize, inputSize, 114f, LetterBoxUtils.PaddingPosition.CENTER);
        array = letterBoxResult.image;
        // è½¬ä¸º float32 ä¸”归一化到 0~1
        array = array.toType(DataType.FLOAT32, false).div(255f); // HWC
        // HWC -> CHW
        array = array.transpose(2, 0, 1); // CHW
        return new NDList(array.expandDims(0));
    }
    @Override
    public DetectedObjects processOutput(TranslatorContext ctx, NDList list) {
        NDManager manager = ctx.getNDManager();
        NDArray preds = list.get(0); // shape: (1, 6, 8400)
        preds = preds.squeeze(0).transpose(1, 0); // shape: (8400, 6)
        // preds shape: (8400, 6)
        NDArray classScores = preds.get(":, 4:6"); // shape: (8400, 2)
        // èŽ·å–æ¯è¡Œæœ€å¤§å€¼ï¼ˆå¯¹åº” Python çš„ .amax(1))
        NDArray maxScores = classScores.max(new int[]{1}); // shape: (8400,)
        // æž„造 mask:score > conf
        NDArray confMask = maxScores.gt(minConfThreshold); // shape: (8400,)
        // åº”用 mask ç­›é€‰
        preds = preds.get(confMask); // shape: (N_filtered, 6)
        if (preds.isEmpty()) {
            return null;
        }
        // æå– box (xywh),转换为 xyxy
        NDArray boxes = preds.get(":, 0:4"); // shape: (N, 4)
        boxes = xywh2xyxy(boxes); // è‡ªå®šä¹‰å‡½æ•°ï¼šcenter xywh -> xyxy
        // 1. å¾—分和类别索引
        NDArray scoresAndClasses = preds.get(":, 4:6");  // shape (num, 2)
        NDArray scores = scoresAndClasses.max(new int[]{1}, true);  // keepDim = true
        NDArray index = scoresAndClasses.argMax(1).expandDims(1);  // æœ€å¤§å€¼ç´¢å¼•,类别,shape (num, 1)
        // 4. æ‹¼æŽ¥
        NDArray result = NDArrays.concat(new NDList(boxes, scores, index), 1);  // åœ¨åˆ—方向拼接
        // NMS è¿‡æ»¤æŽ‰é‡å æ¡†
        int[] keepIndices = NMSUtils.nms(boxes, scores.squeeze(), iouThreshold); // scores.squeeze() âž (N,)
        NDArray kept = result.get(manager.create(keepIndices));
        // å¦‚果超过 topK,则截断
        if (keepIndices.length > topK) {
            int[] topkIndices = new int[topK];
            System.arraycopy(keepIndices, 0, topkIndices, 0, topK);
            keepIndices = topkIndices;
        }
        //恢复原图坐标(除回比例,减掉 padding)
        NDArray restored = LetterBoxUtils.restoreBox(kept, letterBoxResult.r, letterBoxResult.left, letterBoxResult.top, 5,0);
        List<String> classNames = new ArrayList<>();
        List<Double> probabilities = new ArrayList<>();
        List<BoundingBox> boundingBoxes = new ArrayList<>();
        float[] flatData = restored.toFloatArray();
        long[] shape = restored.getShape().getShape(); // æ¯”如 (N, 14)
        int rows = (int) shape[0];
        int cols = (int) shape[1];
        // æŠŠä¸€ç»´æ•°ç»„重组为二维数组
        float[][] data = new float[rows][cols];
        for (int i = 0; i < rows; i++) {
            System.arraycopy(flatData, i * cols, data[i], 0, cols);
        }
        for (float[] row : data) {
            // row结构:(x1, y1, x2, y2, score, classIndex)
            float x1 = row[0];
            float y1 = row[1];
            float x2 = row[2];
            float y2 = row[3];
            float score = row[4];
            int classIndex = (int) row[5];
            double prob = score;
            String className = classIndex == 0 ? "single" : "double";
            // è½¬ç›¸å¯¹åæ ‡ï¼ŒDJL的Rectangle用比例坐标(0~1)
            double rectX = x1 / imageWidth;
            double rectY = y1 / imageHeight;
            double rectW = (x2 - x1) / imageWidth;
            double rectH = (y2 - y1) / imageHeight;
            // æž„建 Polygon å››ä¸ªè§’点
//            List<Point> pointsSrc = new ArrayList<>();
//            pointsSrc.add(new Point(row[5], row[6]));
//            pointsSrc.add(new Point(row[7], row[8]));
//            pointsSrc.add(new Point(row[9], row[10]));
//            pointsSrc.add(new Point(row[11], row[12]));
            Rectangle rectangle = new Rectangle(rectX, rectY, rectW, rectH);
            classNames.add(className);
            probabilities.add(prob);
            boundingBoxes.add(rectangle);
        }
        DetectedObjects detectedObjects = new DetectedObjects(classNames, probabilities, boundingBoxes);
        return detectedObjects;
    }
    @Override
    public Batchifier getBatchifier() {
        return null;
    }
    public static NDArray xywh2xyxy(NDArray xywh) {
        NDArray x = xywh.get(":, 0");
        NDArray y = xywh.get(":, 1");
        NDArray w = xywh.get(":, 2").div(2);
        NDArray h = xywh.get(":, 3").div(2);
        NDArray x1 = x.sub(w);
        NDArray y1 = y.sub(h);
        NDArray x2 = x.add(w);
        NDArray y2 = y.add(h);
        return NDArrays.stack(new NDList(x1, y1, x2, y2), 1);
    }
}
src/main/java/com/xindao/ocr/smartjavaai/model/table/CommonTableStructureModel.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,163 @@
package com.xindao.ocr.smartjavaai.model.table;
import ai.djl.MalformedModelException;
import ai.djl.engine.Engine;
import ai.djl.inference.Predictor;
import ai.djl.modality.cv.Image;
import ai.djl.modality.cv.ImageFactory;
import ai.djl.repository.zoo.Criteria;
import ai.djl.repository.zoo.ModelNotFoundException;
import ai.djl.repository.zoo.ModelZoo;
import ai.djl.repository.zoo.ZooModel;
import cn.smartjavaai.common.entity.R;
import cn.smartjavaai.common.pool.PredictorFactory;
import cn.smartjavaai.common.utils.FileUtils;
import cn.smartjavaai.common.utils.ImageUtils;
import cn.smartjavaai.common.utils.OpenCVUtils;
import com.xindao.ocr.smartjavaai.config.TableStructureConfig;
import com.xindao.ocr.smartjavaai.entity.TableStructureResult;
import com.xindao.ocr.smartjavaai.exception.OcrException;
import com.xindao.ocr.smartjavaai.model.table.criteria.StructureCriteriaFactory;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.pool2.impl.GenericObjectPool;
import org.opencv.core.Mat;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.Objects;
/**
 * è¡¨æ ¼ç»“构模型
 * @author dwj
 */
@Slf4j
public class CommonTableStructureModel implements TableStructureModel{
    private ZooModel<Image, TableStructureResult> model;
    private GenericObjectPool<Predictor<Image, TableStructureResult>> predictorPool;
    @Override
    public void loadModel(TableStructureConfig config) {
        if(StringUtils.isBlank(config.getModelPath())){
            throw new OcrException("modelPath is null");
        }
        Criteria<Image, TableStructureResult> criteria = StructureCriteriaFactory.createCriteria(config);
        try{
            model = ModelZoo.loadModel(criteria);
            // åˆ›å»ºæ± å­ï¼šæ¯ä¸ªçº¿ç¨‹ç‹¬äº« Predictor
            this.predictorPool = new GenericObjectPool<>(new PredictorFactory<>(model));
            int predictorPoolSize = config.getPredictorPoolSize();
            if(config.getPredictorPoolSize() <= 0){
                predictorPoolSize = Runtime.getRuntime().availableProcessors(); // é»˜è®¤ç­‰äºŽCPU核心数
            }
            predictorPool.setMaxTotal(predictorPoolSize);
            log.debug("当前设备: " + model.getNDManager().getDevice());
            log.debug("当前引擎: " + Engine.getInstance().getEngineName());
            log.debug("模型推理器线程池最大数量: " + predictorPoolSize);
        } catch (IOException | ModelNotFoundException | MalformedModelException e) {
            throw new OcrException("表格结构识别模型加载失败", e);
        }
    }
    @Override
    public R<TableStructureResult> detect(BufferedImage image) {
        if(!ImageUtils.isImageValid(image)){
            return R.fail(R.Status.INVALID_IMAGE);
        }
        Image img = null;
        try {
            img = ImageFactory.getInstance().fromImage(OpenCVUtils.image2Mat(image));
            return detect(img);
        } catch (Exception e) {
            throw new OcrException(e);
        } finally {
            if(Objects.nonNull(img)){
                ((Mat)img.getWrappedImage()).release();
            }
        }
    }
    @Override
    public R<TableStructureResult> detect(String imagePath) {
        if(!FileUtils.isFileExists(imagePath)){
            return R.fail(R.Status.FILE_NOT_FOUND);
        }
        Image img = null;
        try {
            img = ImageFactory.getInstance().fromFile(Paths.get(imagePath));
            return detect(img);
        } catch (IOException e) {
            throw new OcrException("无效的图片", e);
        } finally {
            if (Objects.nonNull(img)){
                ((Mat)img.getWrappedImage()).release();
            }
        }
    }
    @Override
    public R<TableStructureResult> detect(byte[] imageData) {
        if(Objects.isNull(imageData)){
            return R.fail(R.Status.INVALID_IMAGE);
        }
        try {
            BufferedImage image = ImageIO.read(new ByteArrayInputStream(imageData));
            return detect(image);
        } catch (IOException e) {
            throw new OcrException("错误的图像", e);
        }
    }
    @Override
    public R<TableStructureResult> detect(Image image) {
        Predictor<Image, TableStructureResult> predictor = null;
        try {
            predictor = predictorPool.borrowObject();
            TableStructureResult result = predictor.predict(image);
            return R.ok(result);
        } catch (Exception e) {
            throw new OcrException("OCR检测错误", e);
        }finally {
            if (predictor != null) {
                try {
                    predictorPool.returnObject(predictor); //归还
                } catch (Exception e) {
                    log.warn("归还Predictor失败", e);
                    try {
                        predictor.close(); // å½’还失败才销毁
                    } catch (Exception ex) {
                        log.error("关闭Predictor失败", ex);
                    }
                }
            }
        }
    }
    @Override
    public GenericObjectPool<Predictor<Image, TableStructureResult>> getPool() {
        return predictorPool;
    }
    @Override
    public void close() throws Exception {
        try {
            if (predictorPool != null) {
                predictorPool.close();
            }
        } catch (Exception e) {
            log.warn("关闭 predictorPool å¤±è´¥", e);
        }
        try {
            if (model != null) {
                model.close();
            }
        } catch (Exception e) {
            log.warn("关闭 model å¤±è´¥", e);
        }
    }
}
src/main/java/com/xindao/ocr/smartjavaai/model/table/TableRecognizer.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,485 @@
package com.xindao.ocr.smartjavaai.model.table;
import ai.djl.modality.cv.Image;
import ai.djl.modality.cv.ImageFactory;
import cn.smartjavaai.common.entity.DetectionRectangle;
import cn.smartjavaai.common.entity.R;
import cn.smartjavaai.common.utils.FileUtils;
import cn.smartjavaai.common.utils.ImageUtils;
import cn.smartjavaai.common.utils.OpenCVUtils;
import com.xindao.ocr.smartjavaai.config.OcrRecOptions;
import com.xindao.ocr.smartjavaai.entity.OcrBox;
import com.xindao.ocr.smartjavaai.entity.OcrInfo;
import com.xindao.ocr.smartjavaai.entity.OcrItem;
import com.xindao.ocr.smartjavaai.entity.TableStructureResult;
import com.xindao.ocr.smartjavaai.exception.OcrException;
import com.xindao.ocr.smartjavaai.model.common.detect.OcrCommonDetModel;
import com.xindao.ocr.smartjavaai.model.common.direction.OcrDirectionModel;
import com.xindao.ocr.smartjavaai.model.common.recognize.OcrCommonRecModel;
import com.xindao.ocr.smartjavaai.utils.ConvertHtml2Excel;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.opencv.core.Mat;
import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.file.Paths;
import java.util.*;
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
/**
 * è¡¨æ ¼å†…容识别器
 * @author dwj
 */
@Slf4j
public class TableRecognizer {
    private OcrCommonDetModel textDetector;
    private TableStructureModel tableStructureModel;
    private OcrCommonRecModel textRecModel;
    private OcrDirectionModel directionModel;
    private TableRecognizer(Builder builder) {
        this.tableStructureModel = builder.tableStructureModel;
        this.textRecModel = builder.textRecModel;
        this.directionModel = builder.directionModel;
        this.textDetector = builder.textDetector;
        textRecModel.setTextDetModel(textDetector);
        textRecModel.setDirectionModel(directionModel);
    }
    public static Builder builder() {
        return new Builder();
    }
    // é“¾å¼è®¾ç½®æ–‡æœ¬è¯†åˆ«æ¨¡åž‹
    public TableRecognizer withTextRecModel(OcrCommonRecModel textRecModel) {
        this.textRecModel = textRecModel;
        return this;
    }
    // é“¾å¼è®¾ç½®è¡¨æ ¼ç»“构模型
    public TableRecognizer withStructureModel(TableStructureModel tableStructureModel) {
        this.tableStructureModel = tableStructureModel;
        return this;
    }
    /**
     * è¡¨æ ¼è¯†åˆ«
     * @param image
     * @return
     */
    public R<TableStructureResult> recognize(Image image) {
        //表格结构识别
        R<TableStructureResult> result = tableStructureModel.detect(image);
        if(!result.isSuccess()){
            return R.fail(result.getCode(), result.getMessage());
        }
        //文本检测+文字识别
        boolean enableDirectionCorrect = directionModel == null ? false : true;
        OcrRecOptions options = new OcrRecOptions(enableDirectionCorrect, false);
        OcrInfo ocrInfo = textRecModel.recognize(image, options);
        List<String> tableContentList = buildTable(result.getData(), ocrInfo);
        String html = convertHtml(result.getData().getTableTagList(), tableContentList);
        result.getData().setHtml(html);
        return result;
    }
    /**
     * è¡¨æ ¼è¯†åˆ«
     * @param image
     * @return
     */
    public R<TableStructureResult> recognize(BufferedImage image) {
        if(!ImageUtils.isImageValid(image)){
            return R.fail(R.Status.INVALID_IMAGE);
        }
        Image img = null;
        try {
            img = ImageFactory.getInstance().fromImage(OpenCVUtils.image2Mat(image));
            return recognize(img);
        } catch (Exception e) {
            throw new OcrException(e);
        } finally {
            if(Objects.nonNull(img)){
                ((Mat)img.getWrappedImage()).release();
            }
        }
    }
    /**
     * è¡¨æ ¼è¯†åˆ«
     * @param imagePath
     * @return
     */
    public R<TableStructureResult> recognize(String imagePath) {
        if(!FileUtils.isFileExists(imagePath)){
            return R.fail(R.Status.FILE_NOT_FOUND);
        }
        Image img = null;
        try {
            img = ImageFactory.getInstance().fromFile(Paths.get(imagePath));
            return recognize(img);
        } catch (IOException e) {
            throw new OcrException("无效的图片", e);
        } finally {
            if (Objects.nonNull(img)){
                ((Mat)img.getWrappedImage()).release();
            }
        }
    }
    /**
     * è¡¨æ ¼è¯†åˆ«
     * @param imageData
     * @return
     */
    public R<TableStructureResult> recognize(byte[] imageData) {
        if(Objects.isNull(imageData)){
            return R.fail(R.Status.INVALID_IMAGE);
        }
        try {
            BufferedImage image = ImageIO.read(new ByteArrayInputStream(imageData));
            return recognize(image);
        } catch (IOException e) {
            throw new OcrException("错误的图像", e);
        }
    }
    /**
     * ç»˜åˆ¶è¡¨æ ¼
     * @param tableStructureResult
     * @param image
     * @param savePath
     */
    public void drawTable(TableStructureResult tableStructureResult, BufferedImage image, String savePath){
        if(Objects.isNull(tableStructureResult) || CollectionUtils.isEmpty(tableStructureResult.getTableTagList())){
            throw new OcrException("表格结构为空");
        }
        for (int i = 0; i < tableStructureResult.getOcrItemList().size(); i++){
            OcrItem item = tableStructureResult.getOcrItemList().get(i);
            DetectionRectangle detectionRectangle = item.getOcrBox().toDetectionRectangle();
            ImageUtils.drawImageRectWithText(image, detectionRectangle, i + "", Color.RED);
        }
        ImageUtils.saveImage(image, savePath);
    }
    /**
     * ç»˜åˆ¶è¡¨æ ¼
     * @param tableStructureResult
     * @param image
     * @return
     */
    public BufferedImage drawTable(TableStructureResult tableStructureResult, BufferedImage image){
        if(Objects.isNull(tableStructureResult) || CollectionUtils.isEmpty(tableStructureResult.getTableTagList())){
            throw new OcrException("表格结构为空");
        }
        for (int i = 0; i < tableStructureResult.getOcrItemList().size(); i++){
            OcrItem item = tableStructureResult.getOcrItemList().get(i);
            DetectionRectangle detectionRectangle = item.getOcrBox().toDetectionRectangle();
            ImageUtils.drawImageRectWithText(image, detectionRectangle, i + "", Color.RED);
        }
        return image;
    }
    /**
     * åˆ é™¤ HTML ä¸­ç¬¬ä¸€ä¸ª <style> ... </style> æ®µè½
     * @param html åŽŸå§‹ HTML
     * @return åŽ»æŽ‰ <style> çš„ HTML
     */
    public static String removeStyleBlock(String html) {
        String lowerHtml = html.toLowerCase();
        int styleStart = lowerHtml.indexOf("<style");
        if (styleStart == -1) {
            return html; // æ²¡æœ‰ style,返回原文
        }
        int styleEnd = lowerHtml.indexOf("</style>", styleStart);
        if (styleEnd == -1) {
            return html; // æ²¡é—­åˆæ ‡ç­¾ï¼Œä¸å¤„理
        }
        styleEnd += "</style>".length();
        // åŽ»æŽ‰ style å—
        return html.substring(0, styleStart) + html.substring(styleEnd);
    }
    /**
     * å¯¼å‡º Excel
     * @param html
     * @param out
     */
    public void exportExcel(String html, OutputStream out){
        String content = removeStyleBlock(html);
        content = content.replace("<html><body>", "");
        content = content.replace("</body></html>", "");
        try (HSSFWorkbook workbook = ConvertHtml2Excel.table2Excel(content)){
            workbook.write(out);
            out.flush();
        } catch (Exception e) {
            throw new OcrException("导出excel失败,请检查表结构是否识别正确");
        }
    }
    /**
     * å¯¼å‡º Excel
     * @param html
     * @param savePath
     */
    public void exportExcel(String html, String savePath){
        String content = removeStyleBlock(html);
        content = content.replace("<html><body>", "");
        content = content.replace("</body></html>", "");
        try (HSSFWorkbook workbook = ConvertHtml2Excel.table2Excel(content)){
            workbook.write(new File(savePath));
        } catch (Exception e) {
            throw new OcrException("导出excel失败,请检查表结构是否识别正确");
        }
    }
    /**
     * æž„建表格
     * @param tableStructureResult
     * @param ocrInfo
     * @return
     */
    public List<String> buildTable(TableStructureResult tableStructureResult, OcrInfo ocrInfo) {
        // èŽ·å– Cell ä¸Ž æ–‡æœ¬æ£€æµ‹æ¡† çš„对应关系(1:N)。
        Map<Integer, List<Integer>> matched = new ConcurrentHashMap<>();
        List<OcrItem> ocrItems = ocrInfo.getOcrItemList();
        for (int i = 0; i < ocrItems.size(); i++) {
            OcrBox ocrBox = ocrItems.get(i).getOcrBox();
            int[] box_1 = {
                    (int)ocrBox.getTopLeft().getX(),
                    (int)ocrBox.getTopLeft().getY(),
                    (int)ocrBox.getBottomRight().getX(),
                    (int)ocrBox.getBottomRight().getY()
            };
            // èŽ·å–ä¸¤ä¸¤cell之间的L1距离和 1- IOU
            List<Pair<Float, Float>> distances = new ArrayList<>();
            for (OcrItem cell : tableStructureResult.getOcrItemList()) {
                OcrBox cellBox = cell.getOcrBox();
                int[] box_2 = {
                        (int)cellBox.getTopLeft().getX(),
                        (int)cellBox.getTopLeft().getY(),
                        (int)cellBox.getBottomRight().getX(),
                        (int)cellBox.getBottomRight().getY()
                };
                float distance = distance(box_1, box_2);
                float iou = 1 - computeIou(box_1, box_2);
                distances.add(Pair.of(distance, iou));
            }
            // æ ¹æ®è·ç¦»å’ŒIOU挑选最"近"的cell
            Pair<Float, Float> nearest = sorted(distances);
            // èŽ·å–æœ€å°è·ç¦»å¯¹åº”çš„ä¸‹æ ‡id,也等价于cell的下标id  ï¼ˆdistances列表是根据遍历cells生成的)
            int id = 0;
            for (int idx = 0; idx < distances.size(); idx++) {
                Pair<Float, Float> current = distances.get(idx);
                if (current.getLeft().floatValue() == nearest.getLeft().floatValue()
                        && current.getRight().floatValue() == nearest.getRight().floatValue()) {
                    id = idx;
                    break;
                }
            }
            if (!matched.containsKey(id)) {
                List<Integer> textIds = new ArrayList<>();
                textIds.add(i);
                // cell id, text id list (dt_boxes index list)
                matched.put(id, textIds);
            } else {
                matched.get(id).add(i);
            }
        }
        List<String> cell_contents = new ArrayList<>();
        List<Double> probs = new ArrayList<>();
        for (int i = 0; i < tableStructureResult.getOcrItemList().size(); i++) {
            List<Integer> textIds = matched.get(i);
            List<String> contents = new ArrayList<>();
            String content = "";
            if (textIds != null) {
                for (Integer id : textIds) {
                    contents.add(ocrItems.get(id).getText());
                }
                content = StringUtils.join(contents, " ");
            }
            cell_contents.add(content);
            probs.add(-1.0);
        }
        return cell_contents;
    }
    /**
     * è®¡ç®—欧式距离
     * Calculate L1 distance
     *
     * @param box_1
     * @param box_2
     * @return
     */
    private int distance(int[] box_1, int[] box_2) {
        int x1 = box_1[0];
        int y1 = box_1[1];
        int x2 = box_1[2];
        int y2 = box_1[3];
        int x3 = box_2[0];
        int y3 = box_2[1];
        int x4 = box_2[2];
        int y4 = box_2[3];
        int dis = Math.abs(x3 - x1) + Math.abs(y3 - y1) + Math.abs(x4 - x2) + Math.abs(y4 - y2);
        int dis_2 = Math.abs(x3 - x1) + Math.abs(y3 - y1);
        int dis_3 = Math.abs(x4 - x2) + Math.abs(y4 - y2);
        return dis + Math.min(dis_2, dis_3);
    }
    /**
     * è®¡ç®—交并比
     * computing IoU
     *
     * @param rec1: (y0, x0, y1, x1), which reflects (top, left, bottom, right)
     * @param rec2: (y0, x0, y1, x1)
     * @return scala value of IoU
     */
    private float computeIou(int[] rec1, int[] rec2) {
        // computing area of each rectangles
        int S_rec1 = (rec1[2] - rec1[0]) * (rec1[3] - rec1[1]);
        int S_rec2 = (rec2[2] - rec2[0]) * (rec2[3] - rec2[1]);
        // computing the sum_area
        int sum_area = S_rec1 + S_rec2;
        // find the each edge of intersect rectangle
        int left_line = Math.max(rec1[1], rec2[1]);
        int right_line = Math.min(rec1[3], rec2[3]);
        int top_line = Math.max(rec1[0], rec2[0]);
        int bottom_line = Math.min(rec1[2], rec2[2]);
        // judge if there is an intersect
        if (left_line >= right_line || top_line >= bottom_line) {
            return 0.0f;
        } else {
            float intersect = (right_line - left_line) * (bottom_line - top_line);
            return (intersect / (sum_area - intersect)) * 1.0f;
        }
    }
    /**
     * è·ç¦»æŽ’序
     * Distance sorted
     *
     * @param distances
     * @return
     */
    private Pair<Float, Float> sorted(List<Pair<Float, Float>> distances) {
        Comparator<Pair<Float, Float>> comparator =
                new Comparator<Pair<Float, Float>>() {
                    @Override
                    public int compare(Pair<Float, Float> a1, Pair<Float, Float> a2) {
                        // é¦–先根据IoU排序
                        if (a1.getRight().floatValue() > a2.getRight().floatValue()) {
                            return 1;
                        } else if (a1.getRight().floatValue() == a2.getRight().floatValue()) {
                            // ç„¶åŽæ ¹æ®L1距离排序
                            if (a1.getLeft().floatValue() > a2.getLeft().floatValue()) {
                                return 1;
                            }
                            return -1;
                        }
                        return -1;
                    }
                };
        // è·ç¦»æŽ’序
        List<Pair<Float, Float>> newDistances = new ArrayList<>();
        CollectionUtils.addAll(newDistances, new Object[distances.size()]);
        Collections.copy(newDistances, distances);
        Collections.sort(newDistances, comparator);
        return newDistances.get(0);
    }
    /**
     * ç”Ÿæˆè¡¨æ ¼html
     * Generate table html
     *
     * @param pred_structures
     * @param cell_contents
     * @return
     */
    public String convertHtml(List<String> pred_structures, List<String> cell_contents) {
        StringBuffer html = new StringBuffer();
        // æ·»åŠ ç»Ÿä¸€çš„æ ·å¼ï¼ˆå¯é€‰æ”¾åˆ°<head>中)
        html.append("<style>\n");
        html.append("table { border-collapse: collapse; }\n");
        html.append("td, th, table { border: 1px solid black; padding: 5px; }\n");
        html.append("</style>\n");
        int td_index = 0;
        for (String tag : pred_structures) {
            if (tag.contains("<td></td>")) {
                String content = cell_contents.get(td_index);
                html.append("<td>");
                html.append(content);
                html.append("</td>");
                td_index++;
                continue;
            }
            html.append(tag);
        }
        return html.toString();
    }
    public static class Builder {
        private TableStructureModel tableStructureModel;
        private OcrCommonRecModel textRecModel;
        private OcrDirectionModel directionModel;
        private OcrCommonDetModel textDetector;
        public Builder withStructureModel(TableStructureModel model) {
            this.tableStructureModel = model;
            return this;
        }
        public Builder withTextRecModel(OcrCommonRecModel model) {
            this.textRecModel = model;
            return this;
        }
        public Builder withDirectionModel(OcrDirectionModel model) {
            this.directionModel = model;
            return this;
        }
        public Builder withTextDetModel(OcrCommonDetModel model) {
            this.textDetector = model;
            return this;
        }
        public TableRecognizer build() {
            if (this.tableStructureModel == null) {
                throw new IllegalStateException("tableStructureModel æœªè®¾ç½®");
            }
            if (this.textDetector == null) {
                throw new IllegalStateException("textDetector æœªè®¾ç½®");
            }
            if (this.textRecModel == null) {
                throw new IllegalStateException("textRecModel æœªè®¾ç½®");
            }
            return new TableRecognizer(this);
        }
    }
}
src/main/java/com/xindao/ocr/smartjavaai/model/table/TableStructureModel.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,65 @@
package com.xindao.ocr.smartjavaai.model.table;
import ai.djl.inference.Predictor;
import ai.djl.modality.cv.Image;
import cn.smartjavaai.common.entity.R;
import com.xindao.ocr.smartjavaai.config.TableStructureConfig;
import com.xindao.ocr.smartjavaai.entity.TableStructureResult;
import org.apache.commons.pool2.impl.GenericObjectPool;
import java.awt.image.BufferedImage;
/**
 * è¡¨æ ¼ç»“构识别模型
 * @author dwj
 */
public interface TableStructureModel extends AutoCloseable{
    /**
     * åŠ è½½æ¨¡åž‹
     * @param config
     */
    void loadModel(TableStructureConfig config);
    /**
     * è¡¨æ ¼ç»“构检测
     * @param image
     * @return
     */
    default R<TableStructureResult> detect(BufferedImage image){
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * è¡¨æ ¼ç»“构检测
     * @param imagePath å›¾ç‰‡è·¯å¾„
     * @return
     */
    default R<TableStructureResult> detect(String imagePath) {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * è¡¨æ ¼ç»“构检测
     * @param imageData å›¾ç‰‡å­—节数组
     * @return
     */
    default R<TableStructureResult> detect(byte[] imageData) {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    /**
     * è¡¨æ ¼ç»“构检测
     * @param image DJL Image
     * @return
     */
    default R<TableStructureResult> detect(Image image){
        throw new UnsupportedOperationException("默认不支持该功能");
    }
    default GenericObjectPool<Predictor<Image, TableStructureResult>> getPool() {
        throw new UnsupportedOperationException("默认不支持该功能");
    }
}
src/main/java/com/xindao/ocr/smartjavaai/model/table/criteria/StructureCriteriaFactory.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,56 @@
package com.xindao.ocr.smartjavaai.model.table.criteria;
import ai.djl.Device;
import ai.djl.modality.cv.Image;
import ai.djl.repository.zoo.Criteria;
import ai.djl.training.util.ProgressBar;
import cn.smartjavaai.common.enums.DeviceEnum;
import com.xindao.ocr.smartjavaai.config.TableStructureConfig;
import com.xindao.ocr.smartjavaai.entity.TableStructureResult;
import com.xindao.ocr.smartjavaai.enums.TableStructureModelEnum;
import com.xindao.ocr.smartjavaai.model.table.translator.TableStructTranslator;
import java.nio.file.Paths;
import java.util.Objects;
/**
 * @author dwj
 * @date 2025/7/10
 */
public class StructureCriteriaFactory {
    public static Criteria<Image, TableStructureResult> createCriteria(TableStructureConfig config) {
        Device device = null;
        if(!Objects.isNull(config.getDevice())){
            device = config.getDevice() == DeviceEnum.CPU ? Device.cpu() : Device.gpu(config.getGpuId());
        }
        Criteria<Image, TableStructureResult> criteria = null;
        if(config.getModelEnum() == TableStructureModelEnum.SLANET){
            criteria =
                    Criteria.builder()
                            .optEngine("OnnxRuntime")
                            .setTypes(Image.class, TableStructureResult.class)
                            .optModelPath(Paths.get(config.getModelPath()))
                            .optOption("removePass", "repeated_fc_relu_fuse_pass")
                            .optDevice(device)
                            .optTranslator(new TableStructTranslator())
                            .optProgress(new ProgressBar())
                            .build();
        }else if(config.getModelEnum() == TableStructureModelEnum.SLANET_PLUS){
            criteria =
                    Criteria.builder()
                            .optEngine("OnnxRuntime")
                            .setTypes(Image.class, TableStructureResult.class)
                            .optModelPath(Paths.get(config.getModelPath()))
                            .optOption("removePass", "repeated_fc_relu_fuse_pass")
                            .optDevice(device)
                            .optTranslator(new TableStructTranslator())
                            .optProgress(new ProgressBar())
                            .build();
        }
        return criteria;
    }
}
src/main/java/com/xindao/ocr/smartjavaai/model/table/translator/TableStructTranslator.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,191 @@
package com.xindao.ocr.smartjavaai.model.table.translator;
import ai.djl.Model;
import ai.djl.modality.cv.Image;
import ai.djl.modality.cv.util.NDImageUtils;
import ai.djl.ndarray.NDArray;
import ai.djl.ndarray.NDList;
import ai.djl.ndarray.index.NDIndex;
import ai.djl.ndarray.types.DataType;
import ai.djl.ndarray.types.Shape;
import ai.djl.translate.Batchifier;
import ai.djl.translate.Translator;
import ai.djl.translate.TranslatorContext;
import ai.djl.util.Utils;
import cn.smartjavaai.common.entity.Point;
import com.xindao.ocr.smartjavaai.entity.OcrBox;
import com.xindao.ocr.smartjavaai.entity.OcrItem;
import com.xindao.ocr.smartjavaai.entity.TableStructureResult;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
/**
 * è¡¨æ ¼è¯†åˆ«çš„前后处理
 */
public class TableStructTranslator implements Translator<Image, TableStructureResult> {
    private final int maxLength = 488;
    private int height;
    private int width;
    private float scale = 1.0f;
    private float xScale;
    private float yScale;
    private List<String> dict;
    private String beg_str = "sos";
    private String end_str = "eos";
    private List<String> td_token = new ArrayList<>();
    @Override
    public void prepare(TranslatorContext ctx) throws IOException {
        Model model = ctx.getModel();
        try (InputStream is = model.getArtifact("table_structure_dict_ch.txt").openStream()) {
            dict = Utils.readLines(is, false);
            dict.add(0,beg_str);
            if(dict.contains("<td>"))
                dict.remove("<td>");
            if(!dict.contains("<td></td>"))
                dict.add("<td></td>");
            dict.add(end_str);
        }
        td_token.add("<td>");
        td_token.add("<td");
        td_token.add("<td></td>");
    }
    @Override
    public NDList processInput(TranslatorContext ctx, Image input) {
        NDArray img = input.toNDArray(ctx.getNDManager(), Image.Flag.COLOR);
        height = input.getHeight();
        width = input.getWidth();
        img = ResizeTableImage(img, height, width, maxLength);
        img = PaddingTableImage(ctx, img, maxLength);
        img = img.transpose(2, 0, 1).div(255).flip(0);
        img = NDImageUtils.normalize(
                        img, new float[]{0.485f, 0.456f, 0.406f}, new float[]{0.229f, 0.224f, 0.225f});
        img = img.expandDims(0);
        return new NDList(img);
    }
    @Override
    public TableStructureResult processOutput(TranslatorContext ctx, NDList list) {
        NDArray bbox_preds = list.get(0);
        NDArray structure_probs = list.get(1);
        NDArray structure_idx = structure_probs.argMax(2);
        structure_probs = structure_probs.max(new int[]{2});
        List<List<String>> structure_batch_list = new ArrayList<>();
        List<List<NDArray>> bbox_batch_list = new ArrayList<>();
        List<List<NDArray>> result_score_list = new ArrayList<>();
        // get ignored tokens
        int beg_idx = dict.indexOf(beg_str);
        int end_idx = dict.indexOf(end_str);
        long batch_size = structure_idx.size(0);
        for (int batch_idx = 0; batch_idx < batch_size; batch_idx++) {
            List<String> structure_list = new ArrayList<>();
            List<NDArray> bbox_list = new ArrayList<>();
            List<NDArray> score_list = new ArrayList<>();
            long len = structure_idx.get(batch_idx).size();
            for (int idx = 0; idx < len; idx++) {
                int char_idx = (int) structure_idx.get(batch_idx).get(idx).toLongArray()[0];
                if (idx > 0 && char_idx == end_idx) {
                    break;
                }
//                if (char_idx == beg_idx || char_idx == end_idx) {
//                    continue;
//                }
                String text = dict.get(char_idx);
                if(td_token.indexOf(text)>-1){
                    NDArray bbox = bbox_preds.get(batch_idx, idx);
//                    bbox.set(new NDIndex("0::2"), bbox.get(new NDIndex("0::2")));
//                    bbox.set(new NDIndex("1::2"), bbox.get(new NDIndex("1::2")));
                    bbox_list.add(bbox);
                }
                structure_list.add(text);
                score_list.add(structure_probs.get(batch_idx, idx));
            }
            structure_batch_list.add(structure_list); // structure_str
            bbox_batch_list.add(bbox_list);
            result_score_list.add(score_list);
        }
        List<String> structure_str_list =structure_batch_list.get(0);
        List<NDArray> bbox_list = bbox_batch_list.get(0);
        List<NDArray> score_list = result_score_list.get(0);
        structure_str_list.add(0,"<html>");
        structure_str_list.add(1,"<body>");
        structure_str_list.add(2,"<table>");
        structure_str_list.add("</table>");
        structure_str_list.add("</body>");
        structure_str_list.add("</html>");
        List<OcrItem> ocrItemList = new ArrayList<>();
        for (int i = 0; i < bbox_list.size(); i++) {
            NDArray box = bbox_list.get(i);
            float[] arr = new float[4];
            arr[0] = box.get(new NDIndex("0::2")).min().toFloatArray()[0];
            arr[1] = box.get(new NDIndex("1::2")).min().toFloatArray()[0];
            arr[2] = box.get(new NDIndex("0::2")).max().toFloatArray()[0];
            arr[3] = box.get(new NDIndex("1::2")).max().toFloatArray()[0];
            Point topLeft = new Point(arr[0] * xScale * width, arr[1] * yScale * height);
            Point topRight = new Point(arr[2] * xScale * width, arr[1] * yScale * height);
            Point bottomRight = new Point(arr[2] * xScale * width, arr[3] * yScale * height);
            Point bottomLeft = new Point(arr[0] * xScale * width, arr[3] * yScale * height);
            OcrBox ocrBox = new OcrBox(topLeft, topRight, bottomRight, bottomLeft);
            //String tag = structure_str_list.get(i + 3); // å‰é¢åŠ äº†<html><body><table> æ‰€ä»¥åç§»+3
            float score = score_list.get(i).toFloatArray()[0]; // èŽ·å–æ¯ä¸ªç»“æž„token的得分
            OcrItem item = new OcrItem();
            item.setOcrBox(ocrBox);
            item.setScore(score);
            //item.setTableTag(tag);
            ocrItemList.add(item);
        }
        return new TableStructureResult(ocrItemList, structure_str_list);
    }
    @Override
    public Batchifier getBatchifier() {
        return null;
    }
    private NDArray ResizeTableImage(NDArray img, int height, int width, int maxLen) {
        int localMax = Math.max(height, width);
        float ratio = maxLen * 1.0f / localMax;
        int resize_h = (int) (height * ratio);
        int resize_w = (int) (width * ratio);
        scale = ratio;
        if(width > height){
            xScale = 1f;
            yScale = (float)width /(float)height;
        } else{
            xScale = (float)height /(float)width;
            yScale = 1f;
        }
        img = NDImageUtils.resize(img, resize_w, resize_h);
        return img;
    }
    private NDArray PaddingTableImage(TranslatorContext ctx, NDArray img, int maxLen) {
        NDArray paddingImg = ctx.getNDManager().zeros(new Shape(maxLen, maxLen, 3), DataType.UINT8);
        paddingImg.set(
                new NDIndex("0:" + img.getShape().get(0) + ",0:" + img.getShape().get(1) + ",:"), img);
        return paddingImg;
    }
}
src/main/java/com/xindao/ocr/smartjavaai/opencv/OcrNDArrayUtils.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,228 @@
package com.xindao.ocr.smartjavaai.opencv;
import ai.djl.ndarray.NDArray;
import org.opencv.core.CvType;
import org.opencv.core.Mat;
import org.opencv.core.MatOfPoint;
import org.opencv.core.Point;
import java.util.ArrayList;
import java.util.List;
/**
 * NDArray Utils
 *
 */
public class OcrNDArrayUtils {
    /**
     * Mat To MatOfPoint
     * @param mat
     * @return
     */
    public static MatOfPoint matToMatOfPoint(Mat mat) {
        int rows = mat.rows();
        MatOfPoint matOfPoint = new MatOfPoint();
        List<Point> list = new ArrayList<>();
        for (int i = 0; i < rows; i++) {
            Point point = new Point((float) mat.get(i, 0)[0], (float) mat.get(i, 1)[0]);
            list.add(point);
        }
        matOfPoint.fromList(list);
        return matOfPoint;
    }
    /**
     * float NDArray To float[][] Array
     * @param ndArray
     * @return
     */
    public static float[][] floatNDArrayToArray(NDArray ndArray) {
        int rows = (int) (ndArray.getShape().get(0));
        int cols = (int) (ndArray.getShape().get(1));
        float[][] arr = new float[rows][cols];
        float[] arrs = ndArray.toFloatArray();
        for (int i = 0; i < rows; i++) {
            for (int j = 0; j < cols; j++) {
                arr[i][j] = arrs[i * cols + j];
            }
        }
        return arr;
    }
    /**
     * Mat To double[][] Array
     * @param mat
     * @return
     */
    public static double[][] matToDoubleArray(Mat mat) {
        int rows = mat.rows();
        int cols = mat.cols();
        double[][] doubles = new double[rows][cols];
        for (int i = 0; i < rows; i++) {
            for (int j = 0; j < cols; j++) {
                doubles[i][j] = mat.get(i, j)[0];
            }
        }
        return doubles;
    }
    /**
     * Mat To float[][] Array
     * @param mat
     * @return
     */
    public static float[][] matToFloatArray(Mat mat) {
        int rows = mat.rows();
        int cols = mat.cols();
        float[][] floats = new float[rows][cols];
        for (int i = 0; i < rows; i++) {
            for (int j = 0; j < cols; j++) {
                floats[i][j] = (float) mat.get(i, j)[0];
            }
        }
        return floats;
    }
    /**
     * Mat To byte[][] Array
     * @param mat
     * @return
     */
    public static byte[][] matToUint8Array(Mat mat) {
        int rows = mat.rows();
        int cols = mat.cols();
        byte[][] bytes = new byte[rows][cols];
        for (int i = 0; i < rows; i++) {
            for (int j = 0; j < cols; j++) {
                bytes[i][j] = (byte) mat.get(i, j)[0];
            }
        }
        return bytes;
    }
    /**
     * float NDArray To float[][] Array
     * @param ndArray
     * @param cvType
     * @return
     */
    public static Mat floatNDArrayToMat(NDArray ndArray, int cvType) {
        int rows = (int) (ndArray.getShape().get(0));
        int cols = (int) (ndArray.getShape().get(1));
        Mat mat = new Mat(rows, cols, cvType);
        float[] arrs = ndArray.toFloatArray();
        for (int i = 0; i < rows; i++) {
            for (int j = 0; j < cols; j++) {
                mat.put(i, j, arrs[i * cols + j]);
            }
        }
        return mat;
    }
    /**
     * float NDArray To Mat
     * @param ndArray
     * @return
     */
    public static Mat floatNDArrayToMat(NDArray ndArray) {
        int rows = (int) (ndArray.getShape().get(0));
        int cols = (int) (ndArray.getShape().get(1));
        Mat mat = new Mat(rows, cols, CvType.CV_32F);
        float[] arrs = ndArray.toFloatArray();
        for (int i = 0; i < rows; i++) {
            for (int j = 0; j < cols; j++) {
                mat.put(i, j, arrs[i * cols + j]);
            }
        }
        return mat;
    }
    /**
     * uint8 NDArray To Mat
     * @param ndArray
     * @return
     */
    public static Mat uint8NDArrayToMat(NDArray ndArray) {
        int rows = (int) (ndArray.getShape().get(0));
        int cols = (int) (ndArray.getShape().get(1));
        Mat mat = new Mat(rows, cols, CvType.CV_8U);
        byte[] arrs = ndArray.toByteArray();
        for (int i = 0; i < rows; i++) {
            for (int j = 0; j < cols; j++) {
                mat.put(i, j, arrs[i * cols + j]);
            }
        }
        return mat;
    }
    /**
     * float[][] Array To Mat
     * @param arr
     * @return
     */
    public static Mat floatArrayToMat(float[][] arr) {
        int rows = arr.length;
        int cols = arr[0].length;
        Mat mat = new Mat(rows, cols, CvType.CV_32F);
        for (int i = 0; i < rows; i++) {
            for (int j = 0; j < cols; j++) {
                mat.put(i, j, arr[i][j]);
            }
        }
        return mat;
    }
    /**
     * byte[][] Array To Mat
     * @param arr
     * @return
     */
    public static Mat uint8ArrayToMat(byte[][] arr) {
        int rows = arr.length;
        int cols = arr[0].length;
        Mat mat = new Mat(rows, cols, CvType.CV_8U);
        for (int i = 0; i < rows; i++) {
            for (int j = 0; j < cols; j++) {
                mat.put(i, j, arr[i][j]);
            }
        }
        return mat;
    }
    /**
     * List To Mat
     * @param points
     * @return
     */
    public static Mat toMat(List<ai.djl.modality.cv.output.Point> points) {
        Mat mat = new Mat(points.size(), 2, CvType.CV_32F);
        for (int i = 0; i < points.size(); i++) {
            ai.djl.modality.cv.output.Point point = points.get(i);
            mat.put(i, 0, (float) point.getX());
            mat.put(i, 1, (float) point.getY());
        }
        return mat;
    }
}
src/main/java/com/xindao/ocr/smartjavaai/utils/ConvertHtml2Excel.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,236 @@
package com.xindao.ocr.smartjavaai.utils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.math.NumberUtils;
import org.apache.poi.hssf.usermodel.*;
import org.apache.poi.ss.usermodel.BorderStyle;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.HorizontalAlignment;
import org.apache.poi.ss.usermodel.VerticalAlignment;
import org.apache.poi.ss.util.CellRangeAddress;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import java.util.ArrayList;
import java.util.List;
/**
 * @Auther: xiaoqiang
 * @Date: 2020/12/9 9:16
 * @Description:
 */
public class ConvertHtml2Excel {
    /**
     * html表格转excel
     *
     * @param tableHtml å¦‚
     *            <table>
     *            ..
     *            </table>
     * @return
     */
    public static HSSFWorkbook table2Excel(String tableHtml) {
        HSSFWorkbook wb = new HSSFWorkbook();
        HSSFSheet sheet = wb.createSheet();
        List<CrossRangeCellMeta> crossRowEleMetaLs = new ArrayList<>();
        int rowIndex = 0;
        try {
            Document data = DocumentHelper.parseText(tableHtml);
            // ç”Ÿæˆè¡¨å¤´
            Element thead = data.getRootElement().element("thead");
            HSSFCellStyle titleStyle = getTitleStyle(wb);
            int ls=0;//列数
            if (thead != null) {
                List<Element> trLs = thead.elements("tr");
                for (Element trEle : trLs) {
                    HSSFRow row = sheet.createRow(rowIndex);
                    List<Element> thLs = trEle.elements("td");
                    ls=thLs.size();
                    makeRowCell(thLs, rowIndex, row, 0, titleStyle, crossRowEleMetaLs);
                    rowIndex++;
                }
            }
            // ç”Ÿæˆè¡¨ä½“
            Element tbody = data.getRootElement().element("tbody");
            HSSFCellStyle contentStyle = getContentStyle(wb);
            if (tbody != null) {
                List<Element> trLs = tbody.elements("tr");
                for (Element trEle : trLs) {
                    HSSFRow row = sheet.createRow(rowIndex);
                    List<Element> thLs = trEle.elements("th");
                    int cellIndex = makeRowCell(thLs, rowIndex, row, 0, titleStyle, crossRowEleMetaLs);
                    List<Element> tdLs = trEle.elements("td");
                    makeRowCell(tdLs, rowIndex, row, cellIndex, contentStyle, crossRowEleMetaLs);
                    rowIndex++;
                }
            }
            // åˆå¹¶è¡¨å¤´
            for (CrossRangeCellMeta crcm : crossRowEleMetaLs) {
                sheet.addMergedRegion(new CellRangeAddress(crcm.getFirstRow(), crcm.getLastRow(), crcm.getFirstCol(), crcm.getLastCol()));
                setRegionStyle(sheet, new CellRangeAddress(crcm.getFirstRow(), crcm.getLastRow(), crcm.getFirstCol(), crcm.getLastCol()),titleStyle);
            }
            for(int i=0;i<sheet.getRow(0).getPhysicalNumberOfCells();i++){
                sheet.autoSizeColumn(i, true);//设置列宽
                if(sheet.getColumnWidth(i)<255*256){
                    sheet.setColumnWidth(i, sheet.getColumnWidth(i) < 9000 ? 9000 : sheet.getColumnWidth(i));
                }else{
                    sheet.setColumnWidth(i, 15000);
                }
            }
        } catch (DocumentException e) {
            e.printStackTrace();
        }
        return wb;
    }
    /**
     * ç”Ÿäº§è¡Œå†…容
     *
     * @return æœ€åŽä¸€åˆ—çš„cell index
     */
    /**
     * @param tdLs th或者td集合
     * @param rowIndex è¡Œå·
     * @param row POI行对象
     * @param startCellIndex
     * @param cellStyle æ ·å¼
     * @param crossRowEleMetaLs è·¨è¡Œå…ƒæ•°æ®é›†åˆ
     * @return
     */
    private static int makeRowCell(List<Element> tdLs, int rowIndex, HSSFRow row, int startCellIndex, HSSFCellStyle cellStyle,
                                   List<CrossRangeCellMeta> crossRowEleMetaLs) {
        int i = startCellIndex;
        for (int eleIndex = 0; eleIndex < tdLs.size(); i++, eleIndex++) {
            int captureCellSize = getCaptureCellSize(rowIndex, i, crossRowEleMetaLs);
            while (captureCellSize > 0) {
                for (int j = 0; j < captureCellSize; j++) {// å½“前行跨列处理(补单元格)
                    row.createCell(i);
                    i++;
                }
                captureCellSize = getCaptureCellSize(rowIndex, i, crossRowEleMetaLs);
            }
            Element thEle = tdLs.get(eleIndex);
            String val = thEle.getTextTrim();
            if (StringUtils.isBlank(val)) {
                Element e = thEle.element("a");
                if (e != null) {
                    val = e.getTextTrim();
                }
            }
            HSSFCell c = row.createCell(i);
            if (NumberUtils.isNumber(val)) {
                c.setCellValue(Double.parseDouble(val));
                c.setCellType(CellType.NUMERIC);
            } else {
                c.setCellValue(val);
            }
            int rowSpan = NumberUtils.toInt(thEle.attributeValue("rowspan"), 1);
            int colSpan = NumberUtils.toInt(thEle.attributeValue("colspan"), 1);
            c.setCellStyle(cellStyle);
            if (rowSpan > 1 || colSpan > 1) { // å­˜åœ¨è·¨è¡Œæˆ–跨列
                crossRowEleMetaLs.add(new CrossRangeCellMeta(rowIndex, i, rowSpan, colSpan));
            }
            if (colSpan > 1) {// å½“前行跨列处理(补单元格)
                for (int j = 1; j < colSpan; j++) {
                    i++;
                    row.createCell(i);
                }
            }
        }
        return i;
    }
    /**
     * è®¾ç½®åˆå¹¶å•元格的边框样式
     *
     * @param sheet
     * @param region
     * @param cs
     */
    public static void setRegionStyle(HSSFSheet sheet, CellRangeAddress region, HSSFCellStyle cs) {
        for (int i = region.getFirstRow(); i <= region.getLastRow(); i++) {
            HSSFRow row = sheet.getRow(i);
            for (int j = region.getFirstColumn(); j <= region.getLastColumn(); j++) {
                HSSFCell cell = row.getCell(j);
                cell.setCellStyle(cs);
            }
        }
    }
    /**
     * èŽ·å¾—å› rowSpan占据的单元格
     *
     * @param rowIndex è¡Œå·
     * @param colIndex åˆ—号
     * @param crossRowEleMetaLs è·¨è¡Œåˆ—元数据
     * @return å½“前行在某列需要占据单元格
     */
    private static int getCaptureCellSize(int rowIndex, int colIndex, List<CrossRangeCellMeta> crossRowEleMetaLs) {
        int captureCellSize = 0;
        for (CrossRangeCellMeta crossRangeCellMeta : crossRowEleMetaLs) {
            if (crossRangeCellMeta.getFirstRow() < rowIndex && crossRangeCellMeta.getLastRow() >= rowIndex) {
                if (crossRangeCellMeta.getFirstCol() <= colIndex && crossRangeCellMeta.getLastCol() >= colIndex) {
                    captureCellSize = crossRangeCellMeta.getLastCol() - colIndex + 1;
                }
            }
        }
        return captureCellSize;
    }
    /**
     * èŽ·å¾—æ ‡é¢˜æ ·å¼
     *
     * @param workbook
     * @return
     */
    private static HSSFCellStyle getTitleStyle(HSSFWorkbook workbook) {
        //short titlebackgroundcolor = IndexedColors.GREY_25_PERCENT.index;
        short fontSize = 12;
        String fontName = "宋体";
        HSSFCellStyle style = workbook.createCellStyle();
        style.setVerticalAlignment(VerticalAlignment.CENTER);
        style.setAlignment(HorizontalAlignment.CENTER);
        style.setBorderBottom(BorderStyle.THIN); //下边框
        style.setBorderLeft(BorderStyle.THIN);//左边框
        style.setBorderTop(BorderStyle.THIN);//上边框
        style.setBorderRight(BorderStyle.THIN);//右边框
        //style.setFillPattern(FillPatternType.SOLID_FOREGROUND);
        //style.setFillForegroundColor(titlebackgroundcolor);// èƒŒæ™¯è‰²
        HSSFFont font = workbook.createFont();
        font.setFontName(fontName);
        font.setFontHeightInPoints(fontSize);
        font.setBold(true);
        style.setFont(font);
        return style;
    }
    /**
     * èŽ·å¾—å†…å®¹æ ·å¼
     *
     * @param wb
     * @return
     */
    private static HSSFCellStyle getContentStyle(HSSFWorkbook wb) {
        short fontSize = 12;
        String fontName = "宋体";
        HSSFCellStyle style = wb.createCellStyle();
        style.setBorderBottom(BorderStyle.THIN); //下边框
        style.setBorderLeft(BorderStyle.THIN);//左边框
        style.setBorderTop(BorderStyle.THIN);//上边框
        style.setBorderRight(BorderStyle.THIN);//右边框
        HSSFFont font = wb.createFont();
        font.setFontName(fontName);
        font.setFontHeightInPoints(fontSize);
        style.setFont(font);
        style.setAlignment(HorizontalAlignment.CENTER);//水平居中
        style.setVerticalAlignment(VerticalAlignment.CENTER);//垂直居中
        style.setWrapText(true);
        return style;
    }
}
src/main/java/com/xindao/ocr/smartjavaai/utils/CrossRangeCellMeta.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,42 @@
package com.xindao.ocr.smartjavaai.utils;
/**
 * @Auther: xiaoqiang
 * @Date: 2020/12/9 9:17
 * @Description:
 */
public class CrossRangeCellMeta {
    public CrossRangeCellMeta(int firstRowIndex, int firstColIndex, int rowSpan, int colSpan) {
        super();
        this.firstRowIndex = firstRowIndex;
        this.firstColIndex = firstColIndex;
        this.rowSpan = rowSpan;
        this.colSpan = colSpan;
    }
    private int firstRowIndex;
    private int firstColIndex;
    private int rowSpan;// è·¨è¶Šè¡Œæ•°
    private int colSpan;// è·¨è¶Šåˆ—æ•°
    public int getFirstRow() {
        return firstRowIndex;
    }
    public int getLastRow() {
        return firstRowIndex + rowSpan - 1;
    }
    public int getFirstCol() {
        return firstColIndex;
    }
    public int getLastCol() {
        return firstColIndex + colSpan - 1;
    }
    public int getColSpan(){
        return colSpan;
    }
}
src/main/java/com/xindao/ocr/smartjavaai/utils/OcrUtils.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,455 @@
package com.xindao.ocr.smartjavaai.utils;
import ai.djl.modality.cv.Image;
import ai.djl.modality.cv.ImageFactory;
import ai.djl.modality.cv.output.BoundingBox;
import ai.djl.modality.cv.output.DetectedObjects;
import ai.djl.modality.cv.output.Landmark;
import ai.djl.modality.cv.util.NDImageUtils;
import ai.djl.ndarray.NDArray;
import ai.djl.ndarray.NDList;
import ai.djl.ndarray.NDManager;
import ai.djl.opencv.OpenCVImageFactory;
import cn.smartjavaai.common.entity.DetectionRectangle;
import cn.smartjavaai.common.entity.Point;
import cn.smartjavaai.common.utils.ImageUtils;
import cn.smartjavaai.common.utils.OpenCVUtils;
import cn.smartjavaai.common.utils.PointUtils;
import com.xindao.ocr.smartjavaai.entity.*;
import com.xindao.ocr.smartjavaai.enums.AngleEnum;
import com.xindao.ocr.smartjavaai.enums.PlateType;
import com.xindao.ocr.smartjavaai.opencv.OcrNDArrayUtils;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections.CollectionUtils;
import org.opencv.core.Mat;
import org.opencv.core.Scalar;
import org.opencv.imgproc.Imgproc;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.util.*;
import java.util.List;
/**
 * @author dwj
 * @date 2025/4/22
 */
@Slf4j
public class OcrUtils {
    /**
     * è½¬æ¢ä¸ºOcrBox
     * @param dt_boxes
     * @return
     */
    public static List<OcrBox> convertToOcrBox(NDList dt_boxes) {
        List<OcrBox> boxList = new ArrayList<>();
        for (NDArray box : dt_boxes) {
            float[] pointsArr = box.toFloatArray();
            OcrBox ocrBox = new OcrBox(
                    new Point(pointsArr[0], pointsArr[1]),
                    new Point(pointsArr[2], pointsArr[3]),
                    new Point(pointsArr[4], pointsArr[5]),
                    new Point(pointsArr[6], pointsArr[7])
            );
            boxList.add(ocrBox);
        }
        return boxList;
    }
    /**
     * è½¬æ¢ä¸ºOcrBox
     * @param ndLists
     * @return
     */
    public static List<List<OcrBox>> convertToOcrBox(List<NDList> ndLists) {
        if (ndLists == null || ndLists.isEmpty()) {
            return Collections.emptyList();
        }
        List<List<OcrBox>> boxLists = new ArrayList<>();
        for (NDList dt_boxes : ndLists) {
            boxLists.add(convertToOcrBox(dt_boxes));
        }
        return boxLists;
    }
    /**
     * å›¾ç‰‡æ—‹è½¬
     *
     * @param manager
     * @param image
     * @return
     */
    public static Image rotateImg(NDManager manager, Image image) {
        NDArray rotated = NDImageUtils.rotate90(image.toNDArray(manager), 1);
        return ImageFactory.getInstance().fromNDArray(rotated);
    }
    /**
     * é€†æ—¶é’ˆæ—‹è½¬å›¾ç‰‡
     *
     * @param image
     * @param times
     * @return
     */
    public static Image rotateImg(Image image, int times) {
        try (NDManager manager = NDManager.newBaseManager()) {
            NDArray rotated = NDImageUtils.rotate90(image.toNDArray(manager), times);
            return OpenCVImageFactory.getInstance().fromNDArray(rotated);
        }
    }
    /**
     * é€†æ—¶é’ˆæ—‹è½¬å›¾ç‰‡
     *
     * @param image
     * @param angleEnum
     * @return
     */
    public static Image rotateImg(Image image, AngleEnum angleEnum) {
        try (NDManager manager = NDManager.newBaseManager()) {
            int times = 0;
            switch (angleEnum) {
                case ANGLE_90:
                    times = 1;
                    break;
                case ANGLE_180:
                    times = 2;
                    break;
                case ANGLE_270:
                    times = 3;
                    break;
            }
            NDArray rotated = NDImageUtils.rotate90(image.toNDArray(manager), times);
            return OpenCVImageFactory.getInstance().fromNDArray(rotated);
        }
    }
    /**
     * è½¬æ¢ä¸ºOcrInfo
     * @param lines
     * @return
     */
    public static OcrInfo convertToOcrInfo(List<ArrayList<RotatedBoxCompX>> lines){
        if(Objects.isNull(lines) || lines.size() == 0){
            return null;
        }
        List<List<OcrItem>> lineList = new ArrayList<List<OcrItem>>();
        String fullText = "";
        for(ArrayList<RotatedBoxCompX> boxList : lines){
            List<OcrItem> line = new ArrayList<OcrItem>();
            for(RotatedBoxCompX box : boxList){
                float[] pointsArr = box.getBox().toFloatArray();
                float[] lt = Arrays.copyOfRange(pointsArr, 0, 2);
                float[] rt = Arrays.copyOfRange(pointsArr, 2, 4);
                float[] rb = Arrays.copyOfRange(pointsArr, 4, 6);
                float[] lb = Arrays.copyOfRange(pointsArr, 6, 8);
                OcrBox ocrBox = new OcrBox(new Point(lt[0], lt[1]), new Point(rt[0], rt[1]), new Point(rb[0], rb[1]), new Point(lb[0], lb[1]));
                OcrItem ocrItem = new OcrItem(ocrBox, box.getText());
                line.add(ocrItem);
                String text = box.getText();
                if(text.trim().equals(""))
                    continue;
                fullText += text + " ";
            }
            lineList.add(line);
            fullText += '\n';
        }
        return new OcrInfo(lineList, fullText);
    }
    public static OcrInfo convertRotatedBoxesToOcrItems(List<RotatedBox> rotatedBoxes) {
        OcrInfo ocrInfo = new OcrInfo();
        List<OcrItem> ocrItems = new ArrayList<>();
        StringBuilder fullText = new StringBuilder();
        for (RotatedBox rotatedBox : rotatedBoxes) {
            NDArray box = rotatedBox.getBox();
            float[] points = box.toFloatArray();
            Point topLeft = new Point(points[0], points[1]);
            Point topRight = new Point(points[2], points[3]);
            Point bottomRight = new Point(points[4], points[5]);
            Point bottomLeft = new Point(points[6], points[7]);
            OcrBox ocrBox = new OcrBox(topLeft, topRight, bottomRight, bottomLeft);
            String text = rotatedBox.getText();
            OcrItem item = new OcrItem();
            item.setOcrBox(ocrBox);
            item.setText(text);
            ocrItems.add(item);
            fullText.append(text + " ");
        }
        if (fullText.length() > 0) {
            fullText.deleteCharAt(fullText.length() - 1);
        }
        ocrInfo.setOcrItemList(ocrItems);
        ocrInfo.setFullText(fullText.toString());
        return ocrInfo;
    }
    /**
     * é€è§†å˜æ¢ + è£å‰ª
     * @param srcMat
     * @param landMarks
     * @return
     */
    public static Image transformAndCrop(Mat srcMat, List<ai.djl.modality.cv.output.Point> landMarks){
        if (landMarks == null || landMarks.size() != 4) {
            throw new IllegalArgumentException("必须提供4个关键点");
        }
        // æ­¥éª¤ 1:排序为 å·¦ä¸Šã€å³ä¸Šã€å³ä¸‹ã€å·¦ä¸‹
        List<ai.djl.modality.cv.output.Point> ordered = PointUtils.orderPoints(landMarks);
        ai.djl.modality.cv.output.Point lt = ordered.get(0);
        ai.djl.modality.cv.output.Point rt = ordered.get(1);
        ai.djl.modality.cv.output.Point rb = ordered.get(2);
        ai.djl.modality.cv.output.Point lb = ordered.get(3);
        // æ­¥éª¤ 2:计算目标图像尺寸(宽、高)
        int img_crop_width = (int) Math.max(
                PointUtils.distance(lt, rt),
                PointUtils.distance(rb, lb)
        );
        int img_crop_height = (int) Math.max(
                PointUtils.distance(lt, lb),
                PointUtils.distance(rt, rb)
        );
        // æ­¥éª¤ 3:构造目标坐标点
        List<ai.djl.modality.cv.output.Point> dstPoints = Arrays.asList(
                new ai.djl.modality.cv.output.Point(0, 0),
                new ai.djl.modality.cv.output.Point(img_crop_width, 0),
                new ai.djl.modality.cv.output.Point(img_crop_width, img_crop_height),
                new ai.djl.modality.cv.output.Point(0, img_crop_height)
        );
        // æ­¥éª¤ 4:透视变换
        Mat srcPoint2f = OcrNDArrayUtils.toMat(ordered);
        Mat dstPoint2f = OcrNDArrayUtils.toMat(dstPoints);
        Mat cvMat = OpenCVUtils.perspectiveTransform(srcMat, srcPoint2f, dstPoint2f);
        // æ­¥éª¤ 5:转为 DJL Image + è£å‰ª
        Image subImg = OpenCVImageFactory.getInstance().fromImage(cvMat);
        subImg = subImg.getSubImage(0, 0, img_crop_width, img_crop_height);
        // é‡Šæ”¾èµ„源
        cvMat.release();
        srcPoint2f.release();
        dstPoint2f.release();
        return subImg;
    }
    /**
     * é€è§†å˜æ¢+裁剪
     * @param srcMat
     * @param box
     * @return
     */
    public static Image transformAndCrop(Mat srcMat, OcrBox box){
        float[] pointsArr = box.toFloatArray();
        float[] lt = Arrays.copyOfRange(pointsArr, 0, 2);
        float[] rt = Arrays.copyOfRange(pointsArr, 2, 4);
        float[] rb = Arrays.copyOfRange(pointsArr, 4, 6);
        float[] lb = Arrays.copyOfRange(pointsArr, 6, 8);
        int img_crop_width = (int) Math.max(PointUtils.distance(lt, rt), PointUtils.distance(rb, lb));
        int img_crop_height = (int) Math.max(PointUtils.distance(lt, lb), PointUtils.distance(rt, rb));
        List<ai.djl.modality.cv.output.Point> srcPoints = new ArrayList<>();
        srcPoints.add(new ai.djl.modality.cv.output.Point(lt[0], lt[1]));
        srcPoints.add(new ai.djl.modality.cv.output.Point(rt[0], rt[1]));
        srcPoints.add(new ai.djl.modality.cv.output.Point(rb[0], rb[1]));
        srcPoints.add(new ai.djl.modality.cv.output.Point(lb[0], lb[1]));
        List<ai.djl.modality.cv.output.Point> dstPoints = new ArrayList<>();
        dstPoints.add(new ai.djl.modality.cv.output.Point(0, 0));
        dstPoints.add(new ai.djl.modality.cv.output.Point(img_crop_width, 0));
        dstPoints.add(new ai.djl.modality.cv.output.Point(img_crop_width, img_crop_height));
        dstPoints.add(new ai.djl.modality.cv.output.Point(0, img_crop_height));
        Mat srcPoint2f = OcrNDArrayUtils.toMat(srcPoints);
        Mat dstPoint2f = OcrNDArrayUtils.toMat(dstPoints);
        //透视变换
        Mat cvMat = OpenCVUtils.perspectiveTransform(srcMat, srcPoint2f, dstPoint2f);
        Image subImg = OpenCVImageFactory.getInstance().fromImage(cvMat);
        //ImageUtils.saveImage(subImg, i + ".png", "build/output");
        //变换后裁剪
        subImg = subImg.getSubImage(0, 0, img_crop_width, img_crop_height);
        cvMat.release();
        srcPoint2f.release();
        dstPoint2f.release();
        return subImg;
    }
    /**
     * ç»˜åˆ¶æ–‡æœ¬æ¡†
     *
     * @param mat
     * @param boxList
     */
    public static void drawRect(Mat mat, List<OcrBox> boxList) {
        for(OcrBox ocrBox : boxList){
            Imgproc.line(mat, ocrBox.getTopLeft().toCvPoint(), ocrBox.getTopRight().toCvPoint(), new Scalar(0, 255, 0), 1);
            Imgproc.line(mat, ocrBox.getTopRight().toCvPoint(), ocrBox.getBottomRight().toCvPoint(), new Scalar(0, 255, 0),1);
            Imgproc.line(mat, ocrBox.getBottomRight().toCvPoint(), ocrBox.getBottomLeft().toCvPoint(), new Scalar(0, 255, 0),1);
            Imgproc.line(mat, ocrBox.getBottomLeft().toCvPoint(), ocrBox.getTopLeft().toCvPoint(), new Scalar(0, 255, 0), 1);
        }
    }
    /**
     * ç»˜åˆ¶æ–‡æœ¬æ¡†åŠæ–‡æœ¬
     * @param image
     * @param ocrInfo
     */
    public static void drawRectWithText(BufferedImage image, OcrInfo ocrInfo,  int fontSize) {
        // å°†ç»˜åˆ¶å›¾åƒè½¬æ¢ä¸ºGraphics2D
        Graphics2D g = (Graphics2D) image.getGraphics();
        try {
            Font font = new Font("楷体", Font.PLAIN, fontSize);
            g.setFont(font);
            g.setColor(new Color(0, 0, 255));
            // å£°æ˜Žç”»ç¬”属性 ï¼šç²— ç»†ï¼ˆå•位像素)末端无修饰 æŠ˜çº¿å¤„呈尖角
            BasicStroke bStroke = new BasicStroke(2, BasicStroke.CAP_BUTT, BasicStroke.JOIN_MITER);
            g.setStroke(bStroke);
            List<OcrItem> ocrItemList = ocrInfo.getOcrItemList();
            if(CollectionUtils.isNotEmpty(ocrInfo.getLineList())){
                ocrItemList = ocrInfo.flattenLines();
            }
            for(OcrItem item : ocrItemList){
                OcrBox box = item.getOcrBox();
                int[] xPoints = {
                        (int)box.getTopLeft().getX(),
                        (int)box.getTopRight().getX(),
                        (int)box.getBottomRight().getX(),
                        (int)box.getBottomLeft().getX(),
                        (int)box.getTopLeft().getX()
                };
                int[] yPoints = {
                        (int)box.getTopLeft().getY(),
                        (int)box.getTopRight().getY(),
                        (int)box.getBottomRight().getY(),
                        (int)box.getBottomLeft().getY(),
                        (int)box.getTopLeft().getY()
                };
                g.drawPolyline(xPoints, yPoints, 5);
                g.drawString(item.getText(), xPoints[0], yPoints[0]);
            }
        } finally {
            g.dispose();
        }
    }
    /**
     * ç»˜åˆ¶æ–‡æœ¬æ¡†åŠæ–‡æœ¬
     * @param srcMat
     * @param itemList
     */
    public static void drawRectWithText(Mat srcMat, List<OcrItem> itemList) {
        for(OcrItem item : itemList){
            OcrBox ocrBox = item.getOcrBox();
            Imgproc.line(srcMat, ocrBox.getTopLeft().toCvPoint(), ocrBox.getTopRight().toCvPoint(), new Scalar(0, 255, 0), 1);
            Imgproc.line(srcMat, ocrBox.getTopRight().toCvPoint(), ocrBox.getBottomRight().toCvPoint(), new Scalar(0, 255, 0),1);
            Imgproc.line(srcMat, ocrBox.getBottomRight().toCvPoint(), ocrBox.getBottomLeft().toCvPoint(), new Scalar(0, 255, 0),1);
            Imgproc.line(srcMat, ocrBox.getBottomLeft().toCvPoint(), ocrBox.getTopLeft().toCvPoint(), new Scalar(0, 255, 0), 1);
            // ä¸­æ–‡ä¹±ç 
            Imgproc.putText(srcMat, item.getAngle().getValue(), ocrBox.getTopLeft().toCvPoint(), Imgproc.FONT_HERSHEY_SCRIPT_SIMPLEX, 1.0, new Scalar(0, 255, 0), 1);
        }
    }
    public static List<PlateInfo> convertToPlateInfo(DetectedObjects detectedObjects, Image image) {
        List<PlateInfo> plateInfoList = new ArrayList<>();
        Iterator iterator = detectedObjects.items().iterator();
        int index = 0;
        while(iterator.hasNext()) {
            DetectedObjects.DetectedObject result = (DetectedObjects.DetectedObject)iterator.next();
            BoundingBox box = result.getBoundingBox();
            List<Point> keyPoints = new ArrayList<Point>();
            if(box instanceof Landmark){
                box.getBounds().getPath().forEach(point -> {
                    keyPoints.add(new Point(point.getX(), point.getY()));
                });
            }
            int x = (int)(box.getBounds().getX() * image.getWidth());
            int y = (int)(box.getBounds().getY() * image.getHeight());
            int width = (int)(box.getBounds().getWidth() * image.getWidth());
            int height = (int)(box.getBounds().getHeight() * image.getHeight());
            // ä¿®æ­£è¾¹ç•Œï¼Œé˜²æ­¢è¶Šç•Œ
            if (x < 0) x = 0;
            if (y < 0) y = 0;
            if (x + width > image.getWidth()) width = image.getWidth() - x;
            if (y + height > image.getHeight()) height = image.getHeight() - y;
            PlateInfo plateInfo = new PlateInfo();
            plateInfo.setPlateType(PlateType.fromClassName(detectedObjects.getClassNames().get(index)));
            plateInfo.setScore(detectedObjects.getProbabilities().get(index).floatValue());
            plateInfo.setDetectionRectangle(new DetectionRectangle(x, y, width, height));
            OcrBox ocrBox = new OcrBox(keyPoints.get(0), keyPoints.get(1), keyPoints.get(2), keyPoints.get(3));
            plateInfo.setBox(ocrBox);
            plateInfoList.add(plateInfo);
            index++;
        }
        return plateInfoList;
    }
    /**
     * ç»˜åˆ¶è½¦ç‰Œä¿¡æ¯
     * @param srcMat
     * @param plateInfoList
     */
    public static void drawPlateInfo(Mat srcMat, List<PlateInfo> plateInfoList) {
        for(PlateInfo plateInfo : plateInfoList){
            OcrBox ocrBox = plateInfo.getBox();
            Imgproc.line(srcMat, ocrBox.getTopLeft().toCvPoint(), ocrBox.getTopRight().toCvPoint(), new Scalar(0, 0, 255), 1);
            Imgproc.line(srcMat, ocrBox.getTopRight().toCvPoint(), ocrBox.getBottomRight().toCvPoint(), new Scalar(0, 0, 255),1);
            Imgproc.line(srcMat, ocrBox.getBottomRight().toCvPoint(), ocrBox.getBottomLeft().toCvPoint(), new Scalar(0, 0, 255),1);
            Imgproc.line(srcMat, ocrBox.getBottomLeft().toCvPoint(), ocrBox.getTopLeft().toCvPoint(), new Scalar(0, 0, 255), 1);
            // ä¸­æ–‡ä¹±ç 
            ImageUtils.putTextWithBackground(srcMat, plateInfo.getPlateNumber() + " " + plateInfo.getPlateColor(), ocrBox.getTopLeft().toCvPoint(), new Scalar(255, 255, 255), new Scalar(0, 0, 0), 1);
        }
    }
    /**
     * åœ¨å›¾åƒä¸Šç»˜åˆ¶å¸¦ç™½è‰²èƒŒæ™¯ã€é»‘色文字的文本
     */
    public static void drawPlateInfo(BufferedImage image, List<PlateInfo> plateInfoList) {
        // å°†ç»˜åˆ¶å›¾åƒè½¬æ¢ä¸ºGraphics2D
        Graphics2D graphics = (Graphics2D) image.getGraphics();
        try {
            graphics.setColor(Color.RED);// è¾¹æ¡†é¢œè‰²
            graphics.setStroke(new BasicStroke(2));   // çº¿å®½2像素
            graphics.setRenderingHint(RenderingHints.KEY_ANTIALIASING,
                    RenderingHints.VALUE_ANTIALIAS_ON); // æŠ—锯齿
            int stroke = 2;
            for(PlateInfo plateInfo : plateInfoList){
                DetectionRectangle rectangle = plateInfo.getDetectionRectangle();
                graphics.setColor(Color.RED);// è¾¹æ¡†é¢œè‰²
                //绘制车牌框
                graphics.drawRect(rectangle.getX(), rectangle.getY(), rectangle.getWidth(), rectangle.getHeight());
                graphics.setColor(Color.BLACK);// å­—体颜色
                ImageUtils.drawText(graphics, plateInfo.getPlateNumber() + " " + plateInfo.getPlateColor(), rectangle.getX(), rectangle.getY(), stroke, 4);
                OcrBox ocrBox = plateInfo.getBox();
                //绘制关键点
                graphics.setColor(Color.BLUE);
                graphics.drawRect((int)ocrBox.getTopLeft().getX(), (int)ocrBox.getTopLeft().getY(), 2, 2);
                graphics.setColor(Color.GREEN);
                graphics.drawRect((int)ocrBox.getTopRight().getX(), (int)ocrBox.getTopRight().getY(), 2, 2);
                graphics.setColor(Color.RED);
                graphics.drawRect((int)ocrBox.getBottomLeft().getX(), (int)ocrBox.getBottomLeft().getY(), 2, 2);
                graphics.setColor(Color.CYAN);
                graphics.drawRect((int)ocrBox.getBottomRight().getX(), (int)ocrBox.getBottomRight().getY(), 2, 2);
            }
        } finally {
            graphics.dispose();
        }
    }
}
src/main/java/com/xindao/ocr/swingui/config/SwingAppConfig.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,31 @@
package com.xindao.ocr.swingui.config;
import com.xindao.ocr.swingui.swing.FileProcessorApp;
import org.springframework.context.ApplicationListener;
import org.springframework.context.event.ContextRefreshedEvent;
import org.springframework.stereotype.Component;
import javax.swing.*;
/**
 * Swing应用程序配置类,负责在Spring容器初始化完成后启动Swing界面
 */
@Component
public class SwingAppConfig implements ApplicationListener<ContextRefreshedEvent> {
    @Override
    public void onApplicationEvent(ContextRefreshedEvent event) {
        // ç¡®ä¿åœ¨Swing事件调度线程中启动UI
        SwingUtilities.invokeLater(() -> {
            try {
                // ä»ŽSpring上下文获取FileProcessorApp实例
                // ç”±äºŽ@PostConstruct注解,Spring会自动调用initialize()方法进行初始化
                FileProcessorApp fileProcessorApp = event.getApplicationContext().getBean(FileProcessorApp.class);
                System.out.println("Swing界面已通过Spring容器自动初始化");
            } catch (Exception e) {
                System.err.println("Swing界面初始化失败: " + e.getMessage());
                e.printStackTrace();
            }
        });
    }
}
src/main/java/com/xindao/ocr/swingui/constant/OcrSwingConstants.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,26 @@
package com.xindao.ocr.swingui.constant;
import java.io.File;
/**
 * swing窗口用到的常量类
 */
public class OcrSwingConstants {
    /**
     * ocr截取图片的缓存路径
     */
    public static final File cacheDir = new File(new File(System.getProperty("user.home")), ".paddle_ocr_images_cache");
    /**
     * å¤šåŒºåŸŸé…ç½®æ–‡ä»¶ä¿å­˜è·¯å¾„
     */
    public static final File pdfToolDir = new File(new File(System.getProperty("user.home")), ".pdf_ocr_tool");
    /**
     * ocr执行文件保存路径
     */
    public static final File ocrDir = new File(new File(System.getProperty("user.home")), ".paddle_ocr_cache");
}
src/main/java/com/xindao/ocr/swingui/controller/OcrController.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,25 @@
package com.xindao.ocr.swingui.controller;
import com.xindao.ocr.swingui.dto.OcrDTO;
import com.xindao.ocr.swingui.service.OcrService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
@RestController
@RequestMapping("/ocr")
public class OcrController {
    @Autowired
    private OcrService ocrService;
    @PostMapping("/recognize")
    public String recognizeText(@RequestBody OcrDTO ocrDTO) {
        // è¿™é‡Œåº”该调用OCR模型进行文本识别
        // ç”±äºŽå…·ä½“实现依赖于OCR库,这里仅返回一个空列表作为示例
        return ocrService.ocr(ocrDTO.getImagePath());
    }
}
src/main/java/com/xindao/ocr/swingui/dto/OcrDTO.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,10 @@
package com.xindao.ocr.swingui.dto;
import lombok.Data;
@Data
public class OcrDTO {
    private String imagePath;
}
src/main/java/com/xindao/ocr/swingui/excel/ContractNumberExcelData.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,18 @@
package com.xindao.ocr.swingui.excel;
import com.alibaba.excel.annotation.ExcelProperty;
import com.alibaba.excel.annotation.write.style.ColumnWidth;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@AllArgsConstructor
@NoArgsConstructor
@ColumnWidth(30) // è®¾ç½®åˆ—宽
public class ContractNumberExcelData {
    @ExcelProperty(value = "合同编号")
    private String contractNumber;
}
src/main/java/com/xindao/ocr/swingui/service/OcrService.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,195 @@
package com.xindao.ocr.swingui.service;
import cn.smartjavaai.common.enums.DeviceEnum;
import com.alibaba.fastjson.JSONObject;
import com.xindao.ocr.smartjavaai.config.DirectionModelConfig;
import com.xindao.ocr.smartjavaai.config.OcrDetModelConfig;
import com.xindao.ocr.smartjavaai.config.OcrRecModelConfig;
import com.xindao.ocr.smartjavaai.config.OcrRecOptions;
import com.xindao.ocr.smartjavaai.entity.OcrInfo;
import com.xindao.ocr.smartjavaai.enums.CommonDetModelEnum;
import com.xindao.ocr.smartjavaai.enums.CommonRecModelEnum;
import com.xindao.ocr.smartjavaai.enums.DirectionModelEnum;
import com.xindao.ocr.smartjavaai.factory.OcrModelFactory;
import com.xindao.ocr.smartjavaai.model.common.detect.OcrCommonDetModel;
import com.xindao.ocr.smartjavaai.model.common.direction.OcrDirectionModel;
import com.xindao.ocr.smartjavaai.model.common.recognize.OcrCommonRecModel;
import com.xindao.ocr.swingui.constant.OcrSwingConstants;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.core.io.ClassPathResource;
import org.springframework.core.io.Resource;
import org.springframework.core.io.support.PathMatchingResourcePatternResolver;
import org.springframework.stereotype.Service;
import javax.annotation.PostConstruct;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
@Slf4j
@Service
@RequiredArgsConstructor
public class OcrService {
    public static DeviceEnum device = DeviceEnum.CPU;
    private final static String MAIN_PP_DIR = "PP_OCRv5";
    private OcrCommonRecModel recModel;
    /**
     * åˆå§‹åŒ–加载ocr模型
     */
    @PostConstruct
    private void init() {
        log.info("复制ocr文件到本地用户缓存...");
        // å¤åˆ¶PP_OCRv5目录到本地缓存
        copyPaddleCppToCache();
        try {
            recModel = getRecModel();
        } catch (IOException e) {
            log.error("加载OCR模型失败: {}", e.getMessage());
            e.printStackTrace();
        }
    }
    /**
     * å¤åˆ¶resources中的Paddle_CPP目录到本地缓存
     */
    private void copyPaddleCppToCache() {
        try {
            // åˆ›å»ºç¼“存目录 - ä½¿ç”¨ç”¨æˆ·ä¸»ç›®å½•避免权限问题
            File cacheDir = OcrSwingConstants.ocrDir;
            if (!cacheDir.exists()) {
                cacheDir.mkdirs();
            }
            // èŽ·å–resources中的Paddle_CPP目录资源
            PathMatchingResourcePatternResolver resolver = new PathMatchingResourcePatternResolver();
            Resource[] resources = resolver.getResources("classpath:"+MAIN_PP_DIR+"/**");
            // å¤åˆ¶æ‰€æœ‰èµ„源到缓存目录,保持目录结构
            for (Resource resource : resources) {
                String resourcePath = resource.getURL().getPath();
                // èŽ·å–ç›¸å¯¹äºŽPaddle_CPP的路径
                int startIndex = resourcePath.indexOf(MAIN_PP_DIR) + MAIN_PP_DIR.length();
                String relativePath = resourcePath.substring(startIndex);
                // å¤„理Windows路径分隔符
                relativePath = relativePath.replace('/', File.separatorChar);
                // åˆ›å»ºç›®æ ‡æ–‡ä»¶
                File destFile = new File(cacheDir, relativePath);
                // å¦‚果是目录,创建目录
                if (resource.isReadable() && resource.contentLength() == 0 && relativePath.endsWith(File.separator)) {
                    if (!destFile.exists()) {
                        destFile.mkdirs();
                        log.info("创建目录: {}", destFile.getAbsolutePath());
                    }
                } else if (resource.isReadable()) {
                    // ç¡®ä¿çˆ¶ç›®å½•存在
                    File parentDir = destFile.getParentFile();
                    if (parentDir != null && !parentDir.exists()) {
                        parentDir.mkdirs();
                    }
                    // å¤åˆ¶æ–‡ä»¶
                    Files.copy(resource.getInputStream(), destFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
                    log.info("复制资源: {} åˆ° {}", relativePath, destFile.getAbsolutePath());
                }
            }
        } catch (IOException e) {
            log.error("复制PP_OCRv5目录到缓存失败: {}", e.getMessage());
            e.printStackTrace();
        }
    }
    /**
     * èŽ·å– resources ä¸‹æ¨¡åž‹æ–‡ä»¶çš„绝对路径
     *
     * @param relativePath ç›¸å¯¹äºŽ resources çš„路径
     */
    private String getModelPath(String relativePath) throws IOException {
        String localPath = null;
        try{
            new ClassPathResource(relativePath).getFile().getAbsolutePath();
        }catch (IOException e){
            // å¦‚果找不到文件,则尝试从本地缓存目录获取
            localPath = getLocalPath(relativePath);
            File modelFile = new File(localPath);
            if (!modelFile.exists()) {
                throw new IOException("模型文件不存在: " + localPath);
            }
        } finally {
            log.info("OCR模型文件路径: {}", localPath);
        }
        return localPath;
    }
    /**
     * èŽ·å– resources ä¸‹æ¨¡åž‹æ–‡ä»¶çš„用户本地路径
     *
     * @param relativePath ç›¸å¯¹äºŽ resources çš„路径
     */
    private String getLocalPath(String relativePath) {
        return new File(OcrSwingConstants.ocrDir,File.separator + relativePath).getAbsolutePath();
    }
    public OcrCommonRecModel getRecModel() throws IOException {
        OcrRecModelConfig recModelConfig = new OcrRecModelConfig();
        recModelConfig.setRecModelEnum(CommonRecModelEnum.PP_OCR_V5_MOBILE_REC_MODEL);
        recModelConfig.setRecModelPath(
                getModelPath("PP-OCRv5_server_rec_infer/PP-OCRv5_server_rec.onnx")
        );
        recModelConfig.setDevice(device);
        recModelConfig.setTextDetModel(getDetectionModel());
        return OcrModelFactory.getInstance().getRecModel(recModelConfig);
    }
    public OcrCommonDetModel getDetectionModel() throws IOException {
        OcrDetModelConfig config = new OcrDetModelConfig();
        config.setModelEnum(CommonDetModelEnum.PP_OCR_V5_MOBILE_DET_MODEL);
        config.setDetModelPath(
                getModelPath("PP-OCRv5_server_det_infer/PP-OCRv5_server_det.onnx")
        );
        config.setDevice(device);
        return OcrModelFactory.getInstance().getDetModel(config);
    }
    public OcrDirectionModel getDirectionModel() throws IOException {
        DirectionModelConfig directionModelConfig = new DirectionModelConfig();
        directionModelConfig.setModelEnum(DirectionModelEnum.PP_LCNET_X0_25);
        directionModelConfig.setModelPath(
                getModelPath("PP-LCNet_x0_25_textline_ori_infer/PP-LCNet_x0_25_textline_ori_infer.onnx")
        );
        directionModelConfig.setDevice(device);
        return OcrModelFactory.getInstance().getDirectionModel(directionModelConfig);
    }
    public OcrCommonRecModel getRecModelWithDirection() throws IOException {
        OcrRecModelConfig recModelConfig = new OcrRecModelConfig();
        recModelConfig.setRecModelEnum(CommonRecModelEnum.PP_OCR_V5_MOBILE_REC_MODEL);
        recModelConfig.setRecModelPath(
                getModelPath("PP-OCRv5_mobile_rec_infer/PP-OCRv5_mobile_rec_infer.onnx")
        );
        recModelConfig.setDevice(device);
        recModelConfig.setTextDetModel(getDetectionModel());
        recModelConfig.setDirectionModel(getDirectionModel());
        return OcrModelFactory.getInstance().getRecModel(recModelConfig);
    }
    public String ocr(String url) {
        String fullText = null;
        try {
            OcrRecOptions options = new OcrRecOptions(false, true);
            OcrInfo ocrInfo = recModel.recognize(url, options);
            log.info("OCR识别结果:{}", JSONObject.toJSONString(ocrInfo));
            fullText = ocrInfo.getFullText();
        } catch (Exception e) {
            e.printStackTrace();
        }
        return fullText;
    }
}
src/main/java/com/xindao/ocr/swingui/swing/FileProcessorApp.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,100 @@
package com.xindao.ocr.swingui.swing;
import com.xindao.ocr.swingui.service.OcrService;
import com.xindao.ocr.swingui.swing.jpanel.ContractNumberProcessPanel;
import com.xindao.ocr.swingui.swing.jpanel.MultipleAreaProcessPanel;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.config.ConfigurableBeanFactory;
import org.springframework.context.annotation.Scope;
import javax.annotation.PostConstruct;
import javax.swing.*;
import javax.swing.border.EmptyBorder;
import java.awt.*;
@org.springframework.stereotype.Component
@Scope(ConfigurableBeanFactory.SCOPE_PROTOTYPE) // è®¾ç½®ä¸ºåŽŸåž‹ä½œç”¨åŸŸï¼Œæ¯æ¬¡èŽ·å–åˆ›å»ºæ–°å®žä¾‹
public class FileProcessorApp extends JFrame {
    @Autowired
    private OcrService ocrService;
    private static final Font DEFAULT_FONT;
    // é¢œè‰²å®šä¹‰
    private static final Color BACKGROUND_COLOR = new Color(245, 245, 247);
    private static final Color TEXT_COLOR = new Color(51, 51, 51);
    private static final Color PRIMARY_COLOR = new Color(66, 133, 244);
    private static final Color TEXT_LIGHT = new Color(102, 102, 102);
    static {
        // å­—体设置
        if (isFontAvailable("Microsoft YaHei")) {
            DEFAULT_FONT = new Font("Microsoft YaHei", Font.PLAIN, 12);
        } else if (isFontAvailable("SimHei")) {
            DEFAULT_FONT = new Font("SimHei", Font.PLAIN, 12);
        } else if (isFontAvailable("WenQuanYi Micro Hei")) {
            DEFAULT_FONT = new Font("WenQuanYi Micro Hei", Font.PLAIN, 12);
        } else {
            DEFAULT_FONT = new Font(Font.SANS_SERIF, Font.PLAIN, 12);
        }
    }
    private static boolean isFontAvailable(String fontName) {
        GraphicsEnvironment ge = GraphicsEnvironment.getLocalGraphicsEnvironment();
        String[] fontNames = ge.getAvailableFontFamilyNames();
        for (String name : fontNames) {
            if (name.equals(fontName)) {
                return true;
            }
        }
        return false;
    }
    public FileProcessorApp() {
        setTitle("OCR图像处理工具");
        setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
        setSize(850, 700);
        setLocationRelativeTo(null);
        setResizable(true);
        // è®¾ç½®å…¨å±€å­—体和背景
        setFont(DEFAULT_FONT);
        getContentPane().setBackground(BACKGROUND_COLOR);
    }
    /**
     * ä½¿ç”¨@PostConstruct确保依赖注入完成后再初始化UI
     * é¿å…æž„造函数中直接使用@Autowired的字段导致null值
     */
    @PostConstruct
    public void initialize() {
        initUI();
        setVisible(true);
    }
    private void initUI() {
        // ä¸»é¢æ¿ä½¿ç”¨è¾¹ç•Œå¸ƒå±€
        JPanel mainPanel = new JPanel(new BorderLayout(15, 15));
        mainPanel.setBorder(new EmptyBorder(15, 15, 15, 15));
        mainPanel.setBackground(BACKGROUND_COLOR);
        // åˆ›å»ºæ ‡ç­¾é¡µé¢æ¿
        JTabbedPane tabbedPane = new JTabbedPane();
        tabbedPane.setFont(DEFAULT_FONT);
        JPanel mainTab = new ContractNumberProcessPanel(BACKGROUND_COLOR, PRIMARY_COLOR, TEXT_COLOR,TEXT_LIGHT,DEFAULT_FONT,this,ocrService).initPanel();
        JPanel extensionPanel = new MultipleAreaProcessPanel(this,ocrService,BACKGROUND_COLOR, PRIMARY_COLOR, TEXT_COLOR, DEFAULT_FONT).initPanel();
        // æ·»åŠ æ ‡ç­¾é¡µåˆ°æ ‡ç­¾é¢æ¿
        tabbedPane.addTab("文件处理", null, mainTab, "");
        tabbedPane.addTab("多区域识别", null, extensionPanel, "");
        // æ·»åŠ æ ‡ç­¾é¢æ¿åˆ°ä¸»é¢æ¿
        mainPanel.add(tabbedPane, BorderLayout.CENTER);
        // è®¾ç½®å†…容面板
        setContentPane(mainPanel);
    }
}
src/main/java/com/xindao/ocr/swingui/swing/jpanel/ContractNumberProcessPanel.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,645 @@
package com.xindao.ocr.swingui.swing.jpanel;
import com.alibaba.excel.EasyExcel;
import com.alibaba.excel.support.ExcelTypeEnum;
import com.xindao.ocr.swingui.constant.OcrSwingConstants;
import com.xindao.ocr.swingui.excel.ContractNumberExcelData;
import com.xindao.ocr.swingui.service.OcrService;
import com.xindao.ocr.swingui.swing.FileProcessorApp;
import com.xindao.ocr.swingui.swing.utils.FileNameValidator;
import com.xindao.ocr.swingui.swing.utils.GenerateCustomizeComponent;
import com.xindao.ocr.swingui.swing.utils.ToFile;
import org.apache.commons.lang3.StringUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.poi.util.IOUtils;
import javax.swing.*;
import javax.swing.border.CompoundBorder;
import javax.swing.border.EmptyBorder;
import javax.swing.border.LineBorder;
import javax.swing.filechooser.FileFilter;
import java.awt.*;
import java.awt.geom.Rectangle2D;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.util.*;
import java.util.List;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.prefs.Preferences;
/**
 * åˆåŒç¼–号处理面板
 */
public class ContractNumberProcessPanel {
    private final OcrService ocrService;
    private final List<File> selectedFiles = new ArrayList<>();
    private final Preferences prefs;
    private final Font DEFAULT_FONT;
    private JTextArea logArea;
    private JLabel filesLabel;
    private JLabel outputDirLabel;
    private JLabel lastSelectionLabel; // æ–°å¢žï¼šç”¨äºŽæ˜¾ç¤ºä¸Šæ¬¡é€‰æ‹©ä¿¡æ¯çš„æ ‡ç­¾
    private File outputDirectory;
    private final Color BACKGROUND_COLOR;
    private final Color PRIMARY_COLOR;
    private final Color TEXT_COLOR;
    private final Color TEXT_LIGHT;
    // PDF区域选择相关的偏好设置键
    private static final String PREF_PDF_PAGE = "lastPdfPage";
    private static final String PREF_PDF_X = "lastPdfX";
    private static final String PREF_PDF_Y = "lastPdfY";
    private static final String PREF_PDF_WIDTH = "lastPdfWidth";
    private static final String PREF_PDF_HEIGHT = "lastPdfHeight";
    private final FileProcessorApp supper;
    AtomicInteger fileIndex = new AtomicInteger(1);
    public ContractNumberProcessPanel(
            Color BACKGROUND_COLOR,
            Color PRIMARY_COLOR,
            Color TEXT_COLOR,
            Color TEXT_LIGHT,
            Font font,
            FileProcessorApp supper,
            OcrService ocrService){
        this.BACKGROUND_COLOR = BACKGROUND_COLOR;
        this.PRIMARY_COLOR = PRIMARY_COLOR;
        this.TEXT_COLOR = TEXT_COLOR;
        this.TEXT_LIGHT = TEXT_LIGHT;
        this.DEFAULT_FONT = font;
        this.supper = supper;
        this.ocrService = ocrService;
        this.prefs = Preferences.userNodeForPackage(ContractNumberProcessPanel.class);
    }
    public JPanel initPanel() {
        // åˆå§‹åŒ–面板
        // ç¬¬ä¸€ä¸ªæ ‡ç­¾é¡µï¼šæ–‡ä»¶å¤„理和日志(合并到一个标签页)
        JPanel mainTab = new JPanel(new BorderLayout(15, 15));
        mainTab.setBorder(new EmptyBorder(15, 15, 15, 15));
        mainTab.setBackground(BACKGROUND_COLOR);
        // é¡¶éƒ¨å¡ç‰‡ï¼šæ–‡ä»¶é€‰æ‹©åŒºåŸŸ
        JPanel topCard = GenerateCustomizeComponent.createCardPanel();
        topCard.setLayout(new BoxLayout(topCard, BoxLayout.Y_AXIS));
        topCard.setBorder(new EmptyBorder(20, 20, 20, 20));
        // æ·»åŠ æ ‡é¢˜ - ä¿®æ”¹ä¸ºå±…中显示
        JPanel titlePanel = new JPanel(new GridBagLayout());
        titlePanel.setOpaque(false);
        JLabel titleLabel = new JLabel("合同编号识别");
        titleLabel.setFont(new Font(DEFAULT_FONT.getName(), Font.BOLD, 18));
        titleLabel.setForeground(PRIMARY_COLOR);
        titleLabel.setBorder(new EmptyBorder(0, 0, 15, 0));
        titlePanel.add(titleLabel);
        topCard.add(titlePanel);
        topCard.add(Box.createVerticalStrut(10));
        // æ–‡ä»¶é€‰æ‹©åŒºåŸŸ
        JPanel fileSelectionPanel = GenerateCustomizeComponent.createStyledPanel(new FlowLayout(FlowLayout.LEFT, 10, 10));
        JButton selectFilesBtn = GenerateCustomizeComponent.createStyledButton("选择文件...",DEFAULT_FONT);
        filesLabel = new JLabel("未选择文件");
        filesLabel.setFont(DEFAULT_FONT);
        filesLabel.setForeground(TEXT_COLOR);
        fileSelectionPanel.add(selectFilesBtn);
        fileSelectionPanel.add(filesLabel);
        // è¾“出目录选择区域
        JPanel outputDirPanel = GenerateCustomizeComponent.createStyledPanel(new FlowLayout(FlowLayout.LEFT, 10, 10));
        JButton selectOutputDirBtn = GenerateCustomizeComponent.createStyledButton("选择输出目录...",DEFAULT_FONT);
        outputDirLabel = new JLabel("未选择输出目录");
        outputDirLabel.setFont(DEFAULT_FONT);
        outputDirLabel.setForeground(TEXT_COLOR);
        outputDirPanel.add(selectOutputDirBtn);
        outputDirPanel.add(outputDirLabel);
        // PDF区域选择按钮和上次选择信息 - ä¿®æ”¹ä¸ºä¸€è¡Œæ˜¾ç¤º
        JPanel pdfSelectionPanel = GenerateCustomizeComponent.createStyledPanel(new FlowLayout(FlowLayout.LEFT, 10, 10));
        JButton selectPdfAreaBtn = GenerateCustomizeComponent.createStyledButton("选择PDF区域...",DEFAULT_FONT);
        pdfSelectionPanel.add(selectPdfAreaBtn);
        // ä¸Šæ¬¡é€‰æ‹©çš„PDF区域信息 - ä½¿ç”¨æˆå‘˜å˜é‡å¼•用
        lastSelectionLabel = new JLabel("上次选择: æ— ");
        lastSelectionLabel.setFont(DEFAULT_FONT);
        lastSelectionLabel.setForeground(TEXT_LIGHT);
        pdfSelectionPanel.add(lastSelectionLabel);
        // å¤„理按钮
        JPanel processBtnPanel = GenerateCustomizeComponent.createStyledPanel(new FlowLayout(FlowLayout.CENTER, 0, 15));
        JButton processBtn = GenerateCustomizeComponent.createPrimaryButton("处理文件",DEFAULT_FONT);
        processBtnPanel.add(processBtn);
        // æ·»åŠ åˆ°é¡¶éƒ¨å¡ç‰‡
        topCard.add(fileSelectionPanel);
        topCard.add(Box.createVerticalStrut(10));
        topCard.add(outputDirPanel);
        topCard.add(Box.createVerticalStrut(10));
        topCard.add(pdfSelectionPanel);
        topCard.add(Box.createVerticalStrut(15));
        topCard.add(processBtnPanel);
        // åº•部卡片:日志区域
        JPanel bottomCard = GenerateCustomizeComponent.createCardPanel();
        bottomCard.setLayout(new BorderLayout());
        bottomCard.setBorder(new EmptyBorder(15, 15, 15, 15));
        JLabel logTitleLabel = new JLabel("处理日志");
        logTitleLabel.setFont(new Font(DEFAULT_FONT.getName(), Font.BOLD, 14));
        logTitleLabel.setForeground(TEXT_COLOR);
        logTitleLabel.setBorder(new EmptyBorder(0, 0, 10, 0));
        logArea = new JTextArea();
        logArea.setEditable(false);
        logArea.setLineWrap(true);
        logArea.setSize(-1,100);
        logArea.setFont(DEFAULT_FONT);
        logArea.setBackground(new Color(250, 250, 250));
        logArea.setBorder(new CompoundBorder(
                new LineBorder(new Color(220, 220, 220)),
                new EmptyBorder(5, 5, 5, 5)
        ));
        JScrollPane scrollPane = new JScrollPane(logArea);
        scrollPane.setBorder(null);
        scrollPane.setVerticalScrollBarPolicy(JScrollPane.VERTICAL_SCROLLBAR_ALWAYS);
        bottomCard.add(logTitleLabel, BorderLayout.NORTH);
        bottomCard.add(scrollPane, BorderLayout.CENTER);
        // æ·»åŠ é¡¶éƒ¨å¡ç‰‡å’Œåº•éƒ¨å¡ç‰‡åˆ°ä¸»æ ‡ç­¾é¡µ
        mainTab.add(topCard, BorderLayout.NORTH);
        mainTab.add(bottomCard, BorderLayout.CENTER);
        // æ·»åŠ äº‹ä»¶ç›‘å¬å™¨
        selectFilesBtn.addActionListener(e -> selectFiles());
        selectOutputDirBtn.addActionListener(e -> selectOutputDirectory());
        processBtn.addActionListener(e -> processFiles());
        selectPdfAreaBtn.addActionListener(e -> selectPdfArea());
        loadLastPaths();
        showLastPdfSelectionInfo();
        return mainTab;
    }
    private void selectFiles() {
        JFileChooser fileChooser = new JFileChooser();
        setComponentFont(fileChooser, DEFAULT_FONT);
        String lastFilePath = prefs.get("lastFilepath", "");
        if (!lastFilePath.isEmpty()) {
            File lastFile = new File(lastFilePath);
            if (lastFile.exists()) {
                fileChooser.setCurrentDirectory(lastFile.getParentFile());
            }
        }
        fileChooser.setMultiSelectionEnabled(true);
        fileChooser.setDialogTitle("选择要处理的文件");
        styleFileChooser(fileChooser);
        int result = fileChooser.showOpenDialog(supper);
        if (result == JFileChooser.APPROVE_OPTION) {
            selectedFiles.clear();
            File[] files = fileChooser.getSelectedFiles();
            selectedFiles.addAll(Arrays.asList(files));
            filesLabel.setText("已选择 " + selectedFiles.size() + " ä¸ªæ–‡ä»¶");
            log("已选择 " + selectedFiles.size() + " ä¸ªæ–‡ä»¶");
            if (files.length > 0) {
                prefs.put("lastFilepath", files[0].getAbsolutePath());
            }
        }
    }
    private void setComponentFont(Component component, Font font) {
        component.setFont(font);
        if (component instanceof Container) {
            for (Component child : ((Container) component).getComponents()) {
                setComponentFont(child, font);
            }
        }
    }
    // ç¾ŽåŒ–文件选择器
    private void styleFileChooser(JFileChooser chooser) {
        chooser.setBackground(BACKGROUND_COLOR);
        chooser.setForeground(TEXT_COLOR);
        // è®¾ç½®æŒ‰é’®æ ·å¼
        for (Component comp : chooser.getComponents()) {
            if (comp instanceof JButton) {
                JButton btn = (JButton) comp;
                btn.setFont(DEFAULT_FONT);
                btn.setBorder(new EmptyBorder(5, 10, 5, 10));
                btn.setFocusPainted(false);
            }
            setComponentFont(comp, DEFAULT_FONT);
        }
    }
    private void selectOutputDirectory() {
        JFileChooser dirChooser = new JFileChooser();
        setComponentFont(dirChooser, DEFAULT_FONT);
        String lastDirPath = prefs.get("lastOutputDir", "");
        if (!lastDirPath.isEmpty()) {
            File lastDir = new File(lastDirPath);
            if (lastDir.exists() && lastDir.isDirectory()) {
                dirChooser.setCurrentDirectory(lastDir);
            }
        }
        dirChooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
        dirChooser.setDialogTitle("选择输出目录");
        styleFileChooser(dirChooser);
        int result = dirChooser.showOpenDialog(supper);
        if (result == JFileChooser.APPROVE_OPTION) {
            outputDirectory = dirChooser.getSelectedFile();
            outputDirLabel.setText(outputDirectory.getAbsolutePath());
            log("已选择输出目录: " + outputDirectory.getAbsolutePath());
            prefs.put("lastOutputDir", outputDirectory.getAbsolutePath());
        }
    }
    /**
     * å¤„理合同编号方法
     */
    private void processFiles() {
        if (selectedFiles.isEmpty()) {
            JOptionPane.showMessageDialog(supper, "请先选择要处理的文件", "提示", JOptionPane.WARNING_MESSAGE);
            return;
        }
        if (outputDirectory == null || !outputDirectory.exists()) {
            JOptionPane.showMessageDialog(supper, "请先选择有效的输出目录", "提示", JOptionPane.WARNING_MESSAGE);
            return;
        }
        log("开始处理文件...");
        //识别到的合同编号列表
        final List<ContractNumberExcelData> contractNumberList = new CopyOnWriteArrayList<>();
        new Thread(() -> {
            int successCount = 0;
            int failCount = 0;
            int processCount = 0;
            for (File file : selectedFiles) {
                processCount++;
                try {
                    //截取pdf选区图像
                    String pathStr = capturePdfArea(file, prefs);
//                    ToFile.preprocessImage(pathStr);
                    //读取图像内容
                    String ocrFullText = FileNameValidator.validateAndCleanFileName(ocrService.ocr(pathStr.replaceFirst("/", "")));
                    //获取识别到的第一个内容
                    String text = file.getName().replace(".pdf","");
                    if(StringUtils.isNotBlank(ocrFullText) && !StringUtils.equals(ocrFullText,text)){
                        text = ocrFullText;
                        String finalText = text;
                        //如果合同编号重复,则在文件名后加一个序号
                        if(contractNumberList.stream().anyMatch(f -> f.getContractNumber().equals(finalText))){
                            text+="("+ fileIndex.get() +")";
                            fileIndex.getAndIncrement();
                        }
                        //将识别的内容设置为文件名,导出到指定目录
                        String outputFileName = text + ".pdf";
                        File outputFile = new File(outputDirectory, outputFileName);
                        if (!outputFile.getParentFile().exists()) {
                            outputFile.getParentFile().mkdirs();
                        }
                        IOUtils.copy(Files.newInputStream(file.toPath()),outputFile);
                    }
                    successCount++;
                    contractNumberList.add(new ContractNumberExcelData(text));
                    log("处理成功("+processCount+"/"+selectedFiles.size()+"): " + file.getName());
                } catch (Exception e) {
                    failCount++;
                    e.printStackTrace();
                    log("处理失败: " + file.getName() + " - " + e.getMessage());
                }finally {
                    //删除临时目录
                    ToFile.deleteTempFiles(OcrSwingConstants.cacheDir);
                }
            }
            //导出识别到的合同编号列表
            try {
                String outputExcelFileName = "合同编号列表_" + LocalDate.now().format(DateTimeFormatter.ofPattern("yyyyMMdd")) + ExcelTypeEnum.XLSX.getValue();
                File outputExcelFile = new File(outputDirectory, outputExcelFileName);
                if (!outputExcelFile.getParentFile().exists()) {
                    outputExcelFile.getParentFile().mkdirs();
                }
                EasyExcel.write(outputExcelFile, ContractNumberExcelData.class).sheet().doWrite(contractNumberList);
                log("文件已导出到: " + outputExcelFile.getAbsolutePath());
            } catch (Exception e) {
                log("导出合同编号列表失败: " + e.getMessage());
            }
            log("处理完成 - æˆåŠŸ: " + successCount + ", å¤±è´¥: " + failCount);
            int finalSuccessCount = successCount;
            int finalFailCount = failCount;
            SwingUtilities.invokeLater(() ->
                    JOptionPane.showMessageDialog(supper,
                            "处理完成\n成功: " + finalSuccessCount + "\n失败: " + finalFailCount,
                            "处理结果", JOptionPane.INFORMATION_MESSAGE)
            );
        }).start();
    }
    // åŠ è½½ä¸Šæ¬¡çš„PDF区域选择信息
    private Map<String, Object> loadLastPdfSelectionInfo() {
        Map<String, Object> info = new HashMap<>();
        int pageNumber = prefs.getInt(PREF_PDF_PAGE, 0);
        float x = prefs.getFloat(PREF_PDF_X, 0);
        float y = prefs.getFloat(PREF_PDF_Y, 0);
        float width = prefs.getFloat(PREF_PDF_WIDTH, 0);
        float height = prefs.getFloat(PREF_PDF_HEIGHT, 0);
        info.put("page", pageNumber);
        info.put("x", x);
        info.put("y", y);
        info.put("width", width);
        info.put("height", height);
        return info;
    }
    // PDF区域选择功能
    private void selectPdfArea() {
        // æ£€æŸ¥æ˜¯å¦æœ‰ä¸Šæ¬¡é€‰æ‹©çš„PDF信息
        Map<String, Object> lastSelection = loadLastPdfSelectionInfo();
        File pdfFile = null;
        int defaultPage = 0;
        boolean hasReSelection = true;
        // å¦‚果有上次选择的信息,询问用户是否使用
        if (!lastSelection.isEmpty()) {
            int option = JOptionPane.showConfirmDialog(supper,
                    "是否使用上次选择的区域?",
                    "上次选择", JOptionPane.YES_NO_OPTION);
            if (option == JOptionPane.YES_OPTION) {
                hasReSelection = false; // ç”¨æˆ·é€‰æ‹©ä½¿ç”¨ä¸Šæ¬¡çš„æ–‡ä»¶
            }
        }
        // å¦‚果没有上次选择的文件或用户不使用上次的文件,则让用户选择新文件
        if (hasReSelection) {
            JFileChooser fileChooser = new JFileChooser();
            setComponentFont(fileChooser, DEFAULT_FONT);
            styleFileChooser(fileChooser);
            // è¿‡æ»¤åªæ˜¾ç¤ºPDF文件
            fileChooser.setFileFilter(new FileFilter() {
                @Override
                public boolean accept(File f) {
                    return f.isDirectory() || f.getName().toLowerCase().endsWith(".pdf");
                }
                @Override
                public String getDescription() {
                    return "PDF文件 (*.pdf)";
                }
            });
            fileChooser.setDialogTitle("选择PDF文件");
            int result = fileChooser.showOpenDialog(supper);
            if (result != JFileChooser.APPROVE_OPTION) {
                return;
            }
            pdfFile = fileChooser.getSelectedFile();
            if (!pdfFile.getName().toLowerCase().endsWith(".pdf")) {
                JOptionPane.showMessageDialog(supper, "请选择PDF文件", "提示", JOptionPane.WARNING_MESSAGE);
                return;
            }
            // åŠ è½½PDF并显示选择面板
            try (PDDocument document = PDDocument.load(Files.newInputStream(pdfFile.toPath()))) {
                int totalPages = document.getNumberOfPages();
                // é»˜è®¤ç¬¬ä¸€é¡µ
                // è®©ç”¨æˆ·è¾“入页码,默认使用上次的页码
                String pageStr = JOptionPane.showInputDialog(supper,
                        "请输入要选择区域的页码(共"+totalPages+"页):",
                        "输入页码",
                        JOptionPane.PLAIN_MESSAGE,
                        null,
                        null,
                        String.valueOf(defaultPage + 1)).toString();
                if (pageStr == null || pageStr.trim().isEmpty()) {
                    return;
                }
                int pageNumber;
                try {
                    pageNumber = Integer.parseInt(pageStr.trim()) - 1; // PDFBox页码从0开始
                } catch (NumberFormatException e) {
                    JOptionPane.showMessageDialog(supper, "请输入有效的页码", "错误", JOptionPane.ERROR_MESSAGE);
                    return;
                }
                if (pageNumber < 0 || pageNumber >= totalPages) {
                    JOptionPane.showMessageDialog(supper, "页码超出范围", "错误", JOptionPane.ERROR_MESSAGE);
                    return;
                }
                // èŽ·å–PDF页面尺寸
                float pdfWidth = document.getPage(pageNumber).getMediaBox().getWidth();
                float pdfHeight = document.getPage(pageNumber).getMediaBox().getHeight();
                // èŽ·å–ä¸Šæ¬¡é€‰æ‹©çš„åŒºåŸŸï¼ˆå¦‚æžœå­˜åœ¨ä¸”æ˜¯å½“å‰æ–‡ä»¶å’Œé¡µé¢ï¼‰
                Rectangle2D lastArea = null;
                if (!lastSelection.isEmpty() && pageNumber == (int)lastSelection.get("page")) {
                    float x = (float)lastSelection.get("x");
                    float y = (float)lastSelection.get("y");
                    float width = (float)lastSelection.get("width");
                    float height = (float)lastSelection.get("height");
                    // è½¬æ¢ä¸ºPDF坐标系统的区域(未缩放的)
                    lastArea = new Rectangle2D.Float(x, y, width, height);
                }
                // åˆ›å»ºPDF预览和区域选择对话框
                JDialog pdfDialog = new JDialog(supper, "选择PDF识别区域 - " + pdfFile.getName(), true);
                pdfDialog.setSize(639, 850);
                pdfDialog.setLocationRelativeTo(supper);
                // åˆ›å»ºPDF预览面板,传入上次选择的区域
                PdfPreviewPanel previewPanel = new PdfPreviewPanel(document, pageNumber, lastArea);
                // åˆ›å»ºå¯æ»šåŠ¨çš„PDF预览面板
                JScrollPane scrollablePreview = new JScrollPane(previewPanel);
                scrollablePreview.setHorizontalScrollBarPolicy(JScrollPane.HORIZONTAL_SCROLLBAR_NEVER);
                scrollablePreview.setVerticalScrollBarPolicy(JScrollPane.VERTICAL_SCROLLBAR_AS_NEEDED);
                scrollablePreview.setPreferredSize(new Dimension(599, 750));
                scrollablePreview.getVerticalScrollBar().setUnitIncrement(16); // è®¾ç½®æ»šåŠ¨é€Ÿåº¦
                scrollablePreview.setBorder(BorderFactory.createEtchedBorder());
                pdfDialog.add(scrollablePreview, BorderLayout.CENTER);
                // åˆ›å»ºåº•部按钮面板
                JPanel buttonPanel = new JPanel();
                JButton confirmBtn = GenerateCustomizeComponent.createPrimaryButton("确认选择",DEFAULT_FONT);
                JButton cancelBtn = GenerateCustomizeComponent.createStyledButton("取消",DEFAULT_FONT);
                confirmBtn.addActionListener(e -> {
                    Rectangle2D selection = previewPanel.getSelection();
                    if (selection != null) {
                        // è½¬æ¢ä¸ºPDF坐标(考虑缩放和平移)
                        Point translation = previewPanel.getTranslation();
                        float scale = previewPanel.getScale();
                        // è®¡ç®—选择区域在PDF文档中的实际坐标
                        float pdfX = (float)((selection.getX() - translation.x) / scale);
                        float pdfY = (float)((selection.getY() - translation.y) / scale);
                        float pdfWidthSel = (float)(selection.getWidth() / scale);
                        float pdfHeightSel = (float)(selection.getHeight() / scale);
                        // ä¿å­˜é€‰æ‹©ä¿¡æ¯
                        savePdfSelectionInfo(pageNumber, pdfX, pdfY, pdfWidthSel, pdfHeightSel);
                        String coordsInfo = String.format(
                                "新的区域坐标: X: %.2f, Y: %.2f, W: %.2f, H: %.2f",
                                pdfX, pdfY, pdfWidthSel, pdfHeightSel
                        );
                        log(coordsInfo);
//                    JOptionPane.showMessageDialog(pdfDialog, "确认选择该区域?", "区域坐标", JOptionPane.INFORMATION_MESSAGE);
                    } else {
                        JOptionPane.showMessageDialog(pdfDialog, "请先选择区域", "提示", JOptionPane.WARNING_MESSAGE);
                        return;
                    }
                    pdfDialog.dispose();
                });
                cancelBtn.addActionListener(e -> pdfDialog.dispose());
                buttonPanel.add(confirmBtn);
                buttonPanel.add(cancelBtn);
                pdfDialog.add(buttonPanel, BorderLayout.SOUTH);
                pdfDialog.setVisible(true);
            } catch (Exception e) {
                log("加载PDF失败: " + e.getMessage());
                JOptionPane.showMessageDialog(supper, "加载PDF失败: " + e.getMessage(), "错误", JOptionPane.ERROR_MESSAGE);
            }
        }
    }
    // ä¿å­˜PDF区域选择信息到偏好设置
    private void savePdfSelectionInfo(int pageNumber, float x, float y, float width, float height) {
//        prefs.put(PREF_PDF_PATH, pdfFile.getAbsolutePath());
        prefs.putInt(PREF_PDF_PAGE, pageNumber);
        prefs.putFloat(PREF_PDF_X, x);
        prefs.putFloat(PREF_PDF_Y, y);
        prefs.putFloat(PREF_PDF_WIDTH, width);
        prefs.putFloat(PREF_PDF_HEIGHT, height);
        // æ›´æ–°ç•Œé¢æ˜¾ç¤º
        showLastPdfSelectionInfo();
    }
    private void loadLastPaths() {
        String lastDirPath = prefs.get("lastOutputDir", "");
        if (!lastDirPath.isEmpty()) {
            File lastDir = new File(lastDirPath);
            if (lastDir.exists() && lastDir.isDirectory()) {
                outputDirectory = lastDir;
                outputDirLabel.setText(outputDirectory.getAbsolutePath());
                log("已加载上次使用的输出目录: " + outputDirectory.getAbsolutePath());
            }
        }
    }
    // æ˜¾ç¤ºä¸Šæ¬¡é€‰æ‹©çš„PDF区域信息 - ä¿®å¤äº†ç±»åž‹è½¬æ¢é—®é¢˜
    private void showLastPdfSelectionInfo() {
        float x = prefs.getFloat(PREF_PDF_X, 0);
        float y = prefs.getFloat(PREF_PDF_Y, 0);
        float width = prefs.getFloat(PREF_PDF_WIDTH, 0);
        float height = prefs.getFloat(PREF_PDF_HEIGHT, 0);
        String info = String.format("上次选择区域:  - X: %.2f, Y: %.2f, W: %.2f, H: %.2f",
                  x, y, width, height);
        // ç›´æŽ¥ä½¿ç”¨æˆå‘˜å˜é‡æ›´æ–°ä¸Šæ¬¡é€‰æ‹©ä¿¡æ¯ï¼Œé¿å…ç»„件查找
        lastSelectionLabel.setText(info);
        log(info);
    }
    private void log(final String message) {
        SwingUtilities.invokeLater(() -> {
            String timestamp = new java.text.SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date());
            logArea.append("["+timestamp+"] "+message + "\n");
            logArea.setCaretPosition(logArea.getDocument().getLength());
        });
    }
    /**
     * æˆªå–pdf文件指定区域的图像
     * @param pdfFile pdf文件
     * @param prefs é€‰åŒºä¿¡æ¯
     */
    private String capturePdfArea(File pdfFile, Preferences prefs) throws IOException {
        try (PDDocument document = PDDocument.load(Files.newInputStream(pdfFile.toPath()))) {
            int page = prefs.getInt(PREF_PDF_PAGE, 0); // è½¬æ¢ä¸ºç”¨æˆ·å‹å¥½çš„页码(从1开始)
            float x = prefs.getFloat(PREF_PDF_X, 0);
            float y = prefs.getFloat(PREF_PDF_Y, 0);
            float width = prefs.getFloat(PREF_PDF_WIDTH, 0);
            float height = prefs.getFloat(PREF_PDF_HEIGHT, 0);
            if (page < 0 || page > document.getNumberOfPages()) {
                throw new IllegalArgumentException("页码超出范围: " + page);
            }
            PDFRenderer pdfRenderer = new PDFRenderer(document);
            BufferedImage pageImage = pdfRenderer.renderImage(page);
            document.close();
            BufferedImage croppedImage = cropImage(pageImage, (int) x, (int) y, (int) width, (int) height);
            //保存图片
            File cacheDir = OcrSwingConstants.cacheDir;
            String outputFilePath =cacheDir.getAbsolutePath() + File.separator + UUID.randomUUID() + ".png";
            boolean saved = ToFile.saveImage(croppedImage, outputFilePath, "png");
            if(saved){
                return outputFilePath;
            }
            return "";
        }
    }
    /**
     * è£å‰ªå›¾åƒæŒ‡å®šåŒºåŸŸ
     * @param originalImage åŽŸå§‹å›¾åƒ
     * @param x å·¦ä¸Šè§’ x åæ ‡
     * @param y å·¦ä¸Šè§’ y åæ ‡
     * @param width è£å‰ªå®½åº¦
     * @param height è£å‰ªé«˜åº¦
     * @return è£å‰ªåŽçš„图像
     */
    private static BufferedImage cropImage(BufferedImage originalImage, int x, int y, int width, int height) {
        return originalImage.getSubimage(x, y, width, height);
    }
}
src/main/java/com/xindao/ocr/swingui/swing/jpanel/MultipleAreaProcessPanel.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,1166 @@
package com.xindao.ocr.swingui.swing.jpanel;
import com.alibaba.excel.EasyExcel;
import com.alibaba.excel.support.ExcelTypeEnum;
import com.alibaba.excel.write.style.column.LongestMatchColumnWidthStyleStrategy;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.xindao.ocr.swingui.constant.OcrSwingConstants;
import com.xindao.ocr.swingui.service.OcrService;
import com.xindao.ocr.swingui.swing.FileProcessorApp;
import com.xindao.ocr.swingui.swing.utils.FileNameValidator;
import com.xindao.ocr.swingui.swing.utils.GenerateCustomizeComponent;
import com.xindao.ocr.swingui.swing.utils.ToFile;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.rendering.PDFRenderer;
import javax.swing.*;
import javax.swing.border.CompoundBorder;
import javax.swing.border.EmptyBorder;
import javax.swing.border.LineBorder;
import javax.swing.filechooser.FileNameExtensionFilter;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.UUID;
/**
 * å¤šåŒºåŸŸå¤„理面板
 */
public class MultipleAreaProcessPanel {
    private Color BACKGROUND_COLOR;
    private Color PRIMARY_COLOR;
    private Color TEXT_COLOR;
    private DefaultListModel<String> batchFileListModel;
    private JList<String> batchFileList;
    private JTextArea batchLogArea;
    private List<RectArea> selectedAreas;    // ç”¨äºŽå­˜å‚¨é€‰æ‹©çš„区域信息
    private OcrService ocrService;
    private Font DEFAULT_FONT;
    private FileProcessorApp supper;
    public MultipleAreaProcessPanel(
            FileProcessorApp supper,
            OcrService ocrService,
            Color BACKGROUND_COLOR,
            Color PRIMARY_COLOR,
            Color TEXT_COLOR,
            Font DEFAULT_FONT) {
        this.BACKGROUND_COLOR = BACKGROUND_COLOR;
        this.PRIMARY_COLOR = PRIMARY_COLOR;
        this.TEXT_COLOR = TEXT_COLOR;
        this.supper = supper;
        this.ocrService = ocrService;
        this.DEFAULT_FONT = DEFAULT_FONT;
    }
    public JPanel initPanel() {
        // åˆ›å»ºåŠŸèƒ½æ‰©å±•æ ‡ç­¾é¡µ
        JPanel extensionPanel = new JPanel(new BorderLayout(15, 15));
        extensionPanel.setBorder(new EmptyBorder(15, 15, 15, 15));
        extensionPanel.setBackground(BACKGROUND_COLOR);
        // é¡¶éƒ¨å¡ç‰‡ï¼šæ‰¹é‡å¤„理操作区域
        JPanel topCard2 = GenerateCustomizeComponent.createCardPanel();
        topCard2.setLayout(new BoxLayout(topCard2, BoxLayout.Y_AXIS));
        topCard2.setBorder(new EmptyBorder(20, 20, 20, 20));
        // æ·»åŠ æ ‡é¢˜ - å±…中显示
        JPanel titlePanel2 = new JPanel(new GridBagLayout());
        titlePanel2.setOpaque(false);
        JLabel titleLabel2 = new JLabel("PDF多区域文本识别");
        titleLabel2.setFont(new Font(DEFAULT_FONT.getName(), Font.BOLD, 18));
        titleLabel2.setForeground(PRIMARY_COLOR);
        titleLabel2.setBorder(new EmptyBorder(0, 0, 15, 0));
        titlePanel2.add(titleLabel2);
        topCard2.add(titlePanel2);
        topCard2.add(Box.createVerticalStrut(10));
        // æ‰¹é‡å¤„理按钮区域
        JPanel batchTopPanel = GenerateCustomizeComponent.createStyledPanel(new FlowLayout(FlowLayout.LEFT, 10, 10));
        JButton selectBatchFilesBtn = GenerateCustomizeComponent.createStyledButton("选择PDF文件", DEFAULT_FONT);
        JButton selectBatchAreaBtn = GenerateCustomizeComponent.createStyledButton("选择PDF区域", DEFAULT_FONT);
        JButton removeSelectedBtn = GenerateCustomizeComponent.createStyledButton("移除选中文件", DEFAULT_FONT);
        JButton clearAllBtn = GenerateCustomizeComponent.createStyledButton("清空列表", DEFAULT_FONT);
        JButton exportBatchBtn = GenerateCustomizeComponent.createPrimaryButton("处理文件", DEFAULT_FONT);
        batchTopPanel.add(selectBatchFilesBtn);
        batchTopPanel.add(selectBatchAreaBtn);
        batchTopPanel.add(removeSelectedBtn);
        batchTopPanel.add(clearAllBtn);
        batchTopPanel.add(exportBatchBtn);
        // å·²é€‰æ‹©æ–‡ä»¶åˆ—表区域
        JPanel fileListPanel = GenerateCustomizeComponent.createStyledPanel(new BorderLayout());
        fileListPanel.setBorder(new EmptyBorder(10, 0, 0, 0));
        JLabel fileListTitleLabel = new JLabel("已选择的PDF文件");
        fileListTitleLabel.setFont(new Font(DEFAULT_FONT.getName(), Font.BOLD, 14));
        fileListTitleLabel.setForeground(TEXT_COLOR);
        fileListTitleLabel.setBorder(new EmptyBorder(0, 0, 5, 0));
        batchFileListModel = new DefaultListModel<>();
        batchFileList = new JList<>(batchFileListModel);
        batchFileList.setSelectionMode(ListSelectionModel.MULTIPLE_INTERVAL_SELECTION);
        batchFileList.setFont(DEFAULT_FONT);
        JScrollPane fileListScrollPane = new JScrollPane(batchFileList);
        fileListScrollPane.setPreferredSize(new Dimension(-1, 150));
        fileListScrollPane.setBorder(new CompoundBorder(
                new LineBorder(new Color(220, 220, 220)),
                new EmptyBorder(5, 5, 5, 5)));
        fileListPanel.add(fileListTitleLabel, BorderLayout.NORTH);
        fileListPanel.add(fileListScrollPane, BorderLayout.CENTER);
        // æ·»åŠ åˆ°é¡¶éƒ¨å¡ç‰‡
        topCard2.add(batchTopPanel);
        topCard2.add(fileListPanel);
        // åº•部卡片:批量处理日志区域
        JPanel bottomCard2 = GenerateCustomizeComponent.createCardPanel();
        bottomCard2.setLayout(new BorderLayout());
        bottomCard2.setBorder(new EmptyBorder(15, 15, 15, 15));
        JLabel logTitleLabel2 = new JLabel("处理日志");
        logTitleLabel2.setFont(new Font(DEFAULT_FONT.getName(), Font.BOLD, 14));
        logTitleLabel2.setForeground(TEXT_COLOR);
        logTitleLabel2.setBorder(new EmptyBorder(0, 0, 10, 0));
        batchLogArea = new JTextArea();
        batchLogArea.setEditable(false);
        batchLogArea.setLineWrap(true);
        batchLogArea.setFont(DEFAULT_FONT);
        batchLogArea.setBackground(new Color(250, 250, 250));
        batchLogArea.setBorder(new CompoundBorder(
                new LineBorder(new Color(220, 220, 220)),
                new EmptyBorder(5, 5, 5, 5)));
        JScrollPane logScrollPane = new JScrollPane(batchLogArea);
        logScrollPane.setBorder(null);
        logScrollPane.setVerticalScrollBarPolicy(JScrollPane.VERTICAL_SCROLLBAR_ALWAYS);
        bottomCard2.add(logTitleLabel2, BorderLayout.NORTH);
        bottomCard2.add(logScrollPane, BorderLayout.CENTER);
        // æ·»åŠ é¡¶éƒ¨å¡ç‰‡å’Œåº•éƒ¨å¡ç‰‡åˆ°åŠŸèƒ½æ‰©å±•æ ‡ç­¾é¡µ
        extensionPanel.add(topCard2, BorderLayout.NORTH);
        extensionPanel.add(bottomCard2, BorderLayout.CENTER);
        // æ·»åŠ äº‹ä»¶ç›‘å¬å™¨
        // ä¸ºæ‰¹é‡å¤„理按钮添加事件监听器
        selectBatchFilesBtn.addActionListener(e -> selectBatchFiles());
        selectBatchAreaBtn.addActionListener(e -> loadLastSelectedAreas());
        removeSelectedBtn.addActionListener(e -> removeSelectedBatchFiles());
        clearAllBtn.addActionListener(e -> clearAllBatchFiles());
         exportBatchBtn.addActionListener(e -> batchProcessAndExport());
        // æ·»åŠ ä¸€äº›åˆå§‹æ—¥å¿—ä¿¡æ¯ï¼ŒéªŒè¯æ—¥å¿—åŒºåŸŸæ˜¯å¦æ­£å¸¸å·¥ä½œ
        appendLog("PDF多区域文本识别工具已初始化");
        appendLog("请选择PDF文件并设置识别区域");
        return extensionPanel;
    }
    /**
     * æ‰¹é‡å¤„理文件方法
     */
    private void batchProcessAndExport(){
        if (batchFileListModel.isEmpty()) {
            JOptionPane.showMessageDialog(supper, "请先选择要处理的文件", "提示", JOptionPane.WARNING_MESSAGE);
            return;
        }
        if (selectedAreas == null || selectedAreas.isEmpty()) {
            JOptionPane.showMessageDialog(supper, "请先选择PDF区域", "提示", JOptionPane.WARNING_MESSAGE);
            return;
        }
        // æ˜¾ç¤ºæ–‡ä»¶é€‰æ‹©å¯¹è¯æ¡†è®©ç”¨æˆ·é€‰æ‹©è¾“出目录
        JFileChooser dirChooser = new JFileChooser();
        dirChooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
        dirChooser.setDialogTitle("选择输出目录");
        int result = dirChooser.showDialog(null, "选择");
        if (result != JFileChooser.APPROVE_OPTION) {
            appendLog("用户取消了输出目录选择");
            return;
        }
        File outputDirectory = dirChooser.getSelectedFile();
        appendLog("输出目录: " + outputDirectory.getAbsolutePath());
        appendLog("开始处理文件...");
        // åˆ›å»ºä¸€ä¸ªçº¿ç¨‹æ± æ¥å¹¶è¡Œå¤„理文件
        SwingWorker<Void, String> worker = new SwingWorker<Void, String>() {
            @Override
            protected Void doInBackground() {
                int processedCount = 0;
                int successCount = 0;
                int failCount = 0;
                //初始化excel表头
                List<List<String>> tableHeader = new ArrayList<>();
                selectedAreas.forEach(s->tableHeader.add(Collections.singletonList(s.getName())));
                //识别到的数据
                List<List<String>> tableData = new ArrayList<>();
                // éåŽ†æ‰€æœ‰é€‰æ‹©çš„æ–‡ä»¶
                for (int i = 0; i < batchFileListModel.size(); i++) {
                    String listItem = batchFileListModel.getElementAt(i);
                    // ä»Žåˆ—表项中提取文件路径(假设格式为 "文件名 (路径)")
                    int startIndex = listItem.lastIndexOf('(') + 1;
                    int endIndex = listItem.lastIndexOf(')');
                    if (startIndex > 0 && endIndex > startIndex) {
                        String filePath = listItem.substring(startIndex, endIndex);
                        File pdfFile = new File(filePath);
                        processedCount++;
                        try {
                            // å¤„理单个PDF文件
                            List<String> ocrResults = processSinglePdfFile(pdfFile, outputDirectory);
                            tableData.add(ocrResults);
                            successCount++;
                            appendLog("文件处理成功(" + processedCount + "/" + batchFileListModel.size() + "): " + pdfFile.getName());
                        } catch (Exception e) {
                            failCount++;
                            appendLog("文件处理失败: " + pdfFile.getName() + " - " + e.getMessage());
                            e.printStackTrace();
                        }
                    }
                }
                //导出excel文件
                try {
                    String outputExcelFileName = "识别结果_" + LocalDate.now().format(DateTimeFormatter.ofPattern("yyyyMMdd")) + ExcelTypeEnum.XLSX.getValue();
                    File outputExcelFile = new File(outputDirectory, outputExcelFileName);
                    if (!outputExcelFile.getParentFile().exists()) {
                        outputExcelFile.getParentFile().mkdirs();
                    }
                    EasyExcel.write(outputExcelFile)
                            .head(tableHeader)
                            .registerWriteHandler(new LongestMatchColumnWidthStyleStrategy())
                            .sheet()
                            .doWrite(tableData);
                    appendLog("文件已导出到: " + outputExcelFile.getAbsolutePath());
                } catch (Exception e) {
                    appendLog("文件导出失败: " + e.getMessage());
                }
                // è¾“出处理统计信息
                publish("成功: " + successCount + ",错误: " + failCount);
                return null;
            }
            @Override
            protected void process(List<String> chunks) {
                for (String message : chunks) {
                    appendLog(message);
                }
            }
            @Override
            protected void done() {
                appendLog("所有文件处理完成");
            }
        };
        worker.execute();
    }
    /**
     * å¤„理单个PDF文件
     */
    private List<String> processSinglePdfFile(File pdfFile, File outputDirectory) throws IOException {
        // ç¡®ä¿è¾“出目录存在
        if (!outputDirectory.exists()) {
            outputDirectory.mkdirs();
        }
        // åˆ›å»ºå½“前PDF文件的结果目录
        String fileNameWithoutExt = pdfFile.getName().substring(0, pdfFile.getName().lastIndexOf('.'));
        // åŠ è½½PDF文档
        PDDocument document = null;
        try {
            document = PDDocument.load(pdfFile);
            PDFRenderer renderer = new PDFRenderer(document);
            List<String> ocrResults = new ArrayList<>();
            // å¤„理每个已选择的区域
            for (RectArea area : selectedAreas) {
                int pageIndex = area.getPageIndex();
                if (pageIndex >= document.getNumberOfPages()) {
                    appendLog("警告: åŒºåŸŸ\"" + area.getName() + "\"指定的页码不存在,将跳过此区域");
                    continue;
                }
                // æ¸²æŸ“当前页
                BufferedImage pageImage = renderer.renderImageWithDPI(pageIndex, 72);
                // æˆªå–区域图像
                BufferedImage areaImage = extractAreaImage(pageImage, area);
                // ç¼“存区域图像
                //保存图片
                File cacheDir = OcrSwingConstants.cacheDir;
                String outputFilePath =cacheDir.getAbsolutePath() + File.separator + UUID.randomUUID() + ".png";
                boolean saved = ToFile.saveImage(areaImage, outputFilePath, "png");
//                ImageIO.write(areaImage, "PNG", areaImageFile);
                if(saved){
                    // å¯¹åŒºåŸŸå›¾åƒè¿›è¡ŒOCR识别
                    String ocrResult = recognizeAreaText(new File(outputFilePath));
                    ocrResults.add(ocrResult);
                }
            }
            return ocrResults;
        } finally {
            if (document != null) {
                document.close();
            }
            //删除临时目录
            ToFile.deleteTempFiles(OcrSwingConstants.cacheDir);
        }
    }
    /**
     * ä»Žé¡µé¢å›¾åƒä¸­æˆªå–指定区域的图像
     */
    private BufferedImage extractAreaImage(BufferedImage pageImage, RectArea area) {
        // ç¡®ä¿æˆªå–区域在图像范围内
        int x = Math.max(0, area.getX());
        int y = Math.max(0, area.getY());
        int width = Math.min(area.getWidth(), pageImage.getWidth() - x);
        int height = Math.min(area.getHeight(), pageImage.getHeight() - y);
        // åˆ›å»ºæˆªå–的图像
        return pageImage.getSubimage(x, y, width, height);
    }
    /**
     * å¯¹åŒºåŸŸå›¾åƒè¿›è¡ŒOCR文本识别,返回识别到的第一个结果
     */
    private String recognizeAreaText(File imageFile) throws IOException {
        // ä½¿ç”¨ocrService进行文本识别
        String fullText  = ocrService.ocr(imageFile.getAbsolutePath());
        if(fullText != null && !fullText.isEmpty()){
            fullText = FileNameValidator.validateAndCleanFileName(fullText);
        }
        return fullText;
    }
    // ç”¨äºŽå­˜å‚¨é€‰æ‹©çš„PDF模板文件路径
    private String selectedTemplatePdfPath = null;
    // ç”¨äºŽJSON序列化和反序列化的ObjectMapper
    private static final ObjectMapper objectMapper = new ObjectMapper();
    // å­˜å‚¨åŒºåŸŸä¿¡æ¯çš„配置文件路径
    private static final String CONFIG_DIR = OcrSwingConstants.pdfToolDir.getAbsolutePath();
    private static final String CONFIG_FILE = "template_areas.json";
    /**
     * è¡¨ç¤ºPDF中的一个矩形区域
     * æ”¯æŒJSON序列化和反序列化
     */
    public static class RectArea {
        private int pageIndex; // é¡µç ç´¢å¼•
        private int x; // GUI中的左上角x像素坐标
        private int y; // GUI中的左上角y像素坐标
        private int width; // GUI中的宽度像素
        private int height; // GUI中的高度像素
        private String name; // åŒºåŸŸåç§°
        private float pdfX; // PDF中的左上角x坐标(点)
        private float pdfY; // PDF中的左上角y坐标(点)
        private float pdfWidth; // PDF中的宽度(点)
        private float pdfHeight;// PDF中的高度(点)
        // æ— å‚构造函数,用于JSON反序列化
        public RectArea() {
        }
        public RectArea(int pageIndex, int x, int y, int width, int height, String name) {
            this.pageIndex = pageIndex;
            this.x = x;
            this.y = y;
            this.width = width;
            this.height = height;
            this.name = name;
            // è¿™äº›PDF坐标会在转换时设置
            this.pdfX = 0;
            this.pdfY = 0;
            this.pdfWidth = 0;
            this.pdfHeight = 0;
        }
        // è®¾ç½®PDF坐标
        public void setPdfCoordinates(float pdfX, float pdfY, float pdfWidth, float pdfHeight) {
            this.pdfX = pdfX;
            this.pdfY = pdfY;
            this.pdfWidth = pdfWidth;
            this.pdfHeight = pdfHeight;
        }
        @Override
        public String toString() {
            return "页面" + (pageIndex + 1) + " - " + name +
                    " [PDF: (" + String.format("%.2f", pdfX) + "," +
                    String.format("%.2f", pdfY) + "," +
                    String.format("%.2f", pdfWidth) + "," +
                    String.format("%.2f", pdfHeight) + ")]";
        }
        // Getters and setters for JSON serialization
        public int getPageIndex() {
            return pageIndex;
        }
        public void setPageIndex(int pageIndex) {
            this.pageIndex = pageIndex;
        }
        public int getX() {
            return x;
        }
        public void setX(int x) {
            this.x = x;
        }
        public int getY() {
            return y;
        }
        public void setY(int y) {
            this.y = y;
        }
        public int getWidth() {
            return width;
        }
        public void setWidth(int width) {
            this.width = width;
        }
        public int getHeight() {
            return height;
        }
        public void setHeight(int height) {
            this.height = height;
        }
        public String getName() {
            return name;
        }
        public void setName(String name) {
            this.name = name;
        }
        public float getPdfX() {
            return pdfX;
        }
        public void setPdfX(float pdfX) {
            this.pdfX = pdfX;
        }
        public float getPdfY() {
            return pdfY;
        }
        public void setPdfY(float pdfY) {
            this.pdfY = pdfY;
        }
        public float getPdfWidth() {
            return pdfWidth;
        }
        public void setPdfWidth(float pdfWidth) {
            this.pdfWidth = pdfWidth;
        }
        public float getPdfHeight() {
            return pdfHeight;
        }
        public void setPdfHeight(float pdfHeight) {
            this.pdfHeight = pdfHeight;
        }
    }
    /**
     * å°è¯•加载上次保存的区域信息,询问用户是否使用
     */
    private void selectBatchPdfArea() {
        selectBatchPdfArea(true);
    }
    /**
     * å°è¯•加载上次保存的区域信息,询问用户是否使用
     */
    private void loadLastSelectedAreas() {
        try {
            File configDir = new File(CONFIG_DIR);
            File configFile = new File(configDir, CONFIG_FILE);
            if (!configFile.exists()) {
                // æ²¡æœ‰é…ç½®æ–‡ä»¶ï¼Œæ‰§è¡ŒåŽŸå§‹çš„é€‰æ‹©æµç¨‹
                selectBatchPdfArea(true);
                return;
            }
            // ç›´æŽ¥è¯»å–区域列表,不再关联模板文件
            TypeReference<List<RectArea>> typeRef = new TypeReference<List<RectArea>>() {};
            List<RectArea> areas = objectMapper.readValue(configFile, typeRef);
            if (areas.isEmpty()) {
                // é…ç½®æ–‡ä»¶ä¸ºç©ºï¼Œæ‰§è¡ŒåŽŸå§‹çš„é€‰æ‹©æµç¨‹
                selectBatchPdfArea(true);
                return;
            }
            // è¯¢é—®ç”¨æˆ·æ˜¯å¦ä½¿ç”¨ä¸Šæ¬¡çš„区域配置
            int choice = JOptionPane.showConfirmDialog(
                    null,  // ä½¿ç”¨null作为父组件
                    "检测到上次保存的区域配置,是否使用?\n\n" +
                    "区域数量: " + areas.size() + " ä¸ª",
                    "使用上次的区域配置",
                    JOptionPane.YES_NO_OPTION,
                    JOptionPane.QUESTION_MESSAGE
            );
            if (choice == JOptionPane.YES_OPTION) {
                // ç”¨æˆ·é€‰æ‹©ä½¿ç”¨ä¸Šæ¬¡çš„配置,但不设置selectedTemplatePdfPath
                selectedAreas = areas;
                appendLog("已加载上次保存的区域配置");
                // åœ¨æ—¥å¿—中显示每个区域的信息
//                for (RectArea area : selectedAreas) {
//                    appendLog("区域: " + area.toString());
//                }
            } else {
                // ç”¨æˆ·é€‰æ‹©ä¸ä½¿ç”¨ä¸Šæ¬¡çš„配置,执行原始的选择流程
                appendLog("用户选择不使用上次的区域配置");
                selectBatchPdfArea(false);
            }
        } catch (Exception e) {
            appendLog("加载上次保存的区域配置失败: " + e.getMessage());
            e.printStackTrace();
            // å‘生异常时,继续使用原始的选择流程
            selectBatchPdfArea(true);
        }
    }
    /**
     * ä¿å­˜åŒºåŸŸä¿¡æ¯åˆ°é…ç½®æ–‡ä»¶
     */
    private void saveAreasToConfig() {
        if (selectedAreas == null || selectedAreas.isEmpty()) {
            return;
        }
        try {
            // åˆ›å»ºé…ç½®ç›®å½•
            File configDir = new File(CONFIG_DIR);
            if (!configDir.exists()) {
                configDir.mkdirs();
            }
            // ç›´æŽ¥ä¿å­˜åŒºåŸŸåˆ—表,不再关联模板文件
            File configFile = new File(configDir, CONFIG_FILE);
            objectMapper.writeValue(configFile, selectedAreas);
            appendLog("区域配置已保存到文件");
        } catch (Exception e) {
            appendLog("保存区域配置失败: " + e.getMessage());
            e.printStackTrace();
        }
    }
    /**
     * ä»Žé…ç½®æ–‡ä»¶åŠ è½½åŒºåŸŸä¿¡æ¯
     */
    private List<RectArea> loadAreasFromConfig(String templatePath) {
        List<RectArea> areas = new ArrayList<>();
        try {
            File configDir = new File(CONFIG_DIR);
            File configFile = new File(configDir, CONFIG_FILE);
            if (!configFile.exists()) {
                return areas;
            }
            // ç›´æŽ¥è¯»å–区域列表,不再检查模板路径
            TypeReference<List<RectArea>> typeRef = new TypeReference<List<RectArea>>() {};
            areas = objectMapper.readValue(configFile, typeRef);
            if (!areas.isEmpty()) {
                appendLog("已加载 " + areas.size() + " ä¸ªä¿å­˜çš„区域配置");
            }
        } catch (Exception e) {
            appendLog("加载区域配置失败: " + e.getMessage());
            e.printStackTrace();
        }
        return areas;
    }
    /**
     * é€‰æ‹©PDF区域方法
     * @param loadSavedAreas æ˜¯å¦å°è¯•加载已保存的区域配置
     */
    private void selectBatchPdfArea(boolean loadSavedAreas) {
        // åˆ›å»ºæ–‡ä»¶é€‰æ‹©å™¨é€‰æ‹©æ¨¡æ¿PDF
        JFileChooser fileChooser = new JFileChooser();
        fileChooser.setFileFilter(new FileNameExtensionFilter("PDF文件 (*.pdf)", "pdf"));
        int result = fileChooser.showOpenDialog(supper);
        if (result == JFileChooser.APPROVE_OPTION) {
            File selectedFile = fileChooser.getSelectedFile();
            selectedTemplatePdfPath = selectedFile.getAbsolutePath();
            // æ˜¯å¦å°è¯•加载已保存的区域配置
            if (loadSavedAreas) {
                // é¦–先尝试加载已保存的区域配置
                List<RectArea> savedAreas = loadAreasFromConfig(selectedTemplatePdfPath);
                if (!savedAreas.isEmpty()) {
                    // å¦‚果有保存的区域信息,直接使用
                    selectedAreas = savedAreas;
                    appendLog("使用已保存的区域配置");
                    return;
                }
                // å¦‚果尝试加载但没有保存的区域信息,继续执行下面的代码打开对话框
            }
            // æ— è®ºloadSavedAreas是什么值,只要没有保存的区域信息或用户选择不加载已保存的配置,都打开区域选择对话框
            PdfAreaSelectionDialog dialog = new PdfAreaSelectionDialog(selectedTemplatePdfPath);
            dialog.setModal(true);
            dialog.setVisible(true);
            if (dialog.isConfirmed()) {
                // èŽ·å–ç”¨æˆ·é€‰æ‹©çš„åŒºåŸŸ
                selectedAreas = dialog.getSelectedAreas();
                appendLog("已选择 " + selectedAreas.size() + " ä¸ªPDF区域");
                // ä¿å­˜ç”¨æˆ·é€‰æ‹©çš„区域配置
                saveAreasToConfig();
            }
        } else {
            appendLog("用户取消了模板PDF选择");
        }
    }
    /**
     * PDF区域选择对话框
     */
    private class PdfAreaSelectionDialog extends JDialog {
        private PDDocument document;
        private int totalPages;
        private int currentPageIndex = 0;
        private List<RectArea> areas = new ArrayList<>();
        private boolean confirmed = false;
        private JPanel pdfPreviewPanel;
        private DefaultListModel<String> areaListModel;
        private JList<String> areaList;
        private BufferedImage currentImage; // å½“前页面的图像
        public PdfAreaSelectionDialog(String pdfPath) {
            setTitle("选择PDF识别区域");
            setSize(900, 700);
            setLocationRelativeTo(null);
            try {
                // åŠ è½½PDF文档
                document = PDDocument.load(new File(pdfPath));
                totalPages = document.getNumberOfPages();
                // å°è¯•从配置文件加载已保存的区域信息
                List<RectArea> savedAreas = loadAreasFromConfig(pdfPath);
                if (!savedAreas.isEmpty()) {
                    areas = savedAreas;
                    appendLog("已加载 " + savedAreas.size() + " ä¸ªä¿å­˜çš„区域配置到编辑对话框");
                }
            } catch (IOException ex) {
                appendLog("加载PDF失败: " + ex.getMessage());
                JOptionPane.showMessageDialog(this, "加载PDF失败: " + ex.getMessage(), "错误", JOptionPane.ERROR_MESSAGE);
                dispose();
                return;
            }
            // åˆ›å»ºä¸»é¢æ¿
            JPanel mainPanel = new JPanel(new BorderLayout(10, 10));
            mainPanel.setBorder(new EmptyBorder(10, 10, 10, 10));
            // åˆ›å»ºPDF预览区域
            JPanel previewPanel = new JPanel(new BorderLayout(5, 5));
            // é¡µé¢æŽ§åˆ¶æŒ‰é’®
            JPanel pageControlPanel = new JPanel(new FlowLayout(FlowLayout.CENTER, 10, 5));
            JButton prevPageBtn = new JButton("上一页");
            JButton nextPageBtn = new JButton("下一页");
            JLabel pageLabel = new JLabel("页面: 1 / " + totalPages);
            pageControlPanel.add(prevPageBtn);
            pageControlPanel.add(pageLabel);
            pageControlPanel.add(nextPageBtn);
            // åˆ›å»ºå¯æ»šåŠ¨çš„PDF预览面板
            JScrollPane scrollablePreview = new JScrollPane();
            scrollablePreview.setHorizontalScrollBarPolicy(JScrollPane.HORIZONTAL_SCROLLBAR_NEVER);
            scrollablePreview.setVerticalScrollBarPolicy(JScrollPane.VERTICAL_SCROLLBAR_AS_NEEDED);
            scrollablePreview.setPreferredSize(new Dimension(639, 700));
            scrollablePreview.getVerticalScrollBar().setUnitIncrement(16); // è®¾ç½®æ»šåŠ¨é€Ÿåº¦
            scrollablePreview.setBorder(BorderFactory.createEtchedBorder());
            // PDF预览面板
            pdfPreviewPanel = new JPanel() {
                private Point startPoint;
                private Point endPoint;
                private boolean isDrawing = false;
                @Override
                protected void paintComponent(Graphics g) {
                    super.paintComponent(g);
                    // é¦–先绘制PDF图像
                    if (currentImage != null) {
                        g.drawImage(currentImage, 0, 0, this);
                    }
                    // ç»˜åˆ¶é€‰æ‹©æ¡†ï¼ˆè¿›è¡Œä¸­çš„选择)
                    if (isDrawing && startPoint != null && endPoint != null) {
                        // ç»˜åˆ¶é€‰æ‹©æ¡†
                        Graphics2D g2d = (Graphics2D) g;
                        g2d.setColor(new Color(255, 0, 0, 100)); // ä½¿ç”¨çº¢è‰²å¡«å……,更容易看到
                        int x = Math.min(startPoint.x, endPoint.x);
                        int y = Math.min(startPoint.y, endPoint.y);
                        int width = Math.abs(endPoint.x - startPoint.x);
                        int height = Math.abs(endPoint.y - startPoint.y);
                        g2d.fillRect(x, y, width, height);
                        g2d.setColor(Color.RED); // çº¢è‰²è¾¹æ¡†
                        g2d.setStroke(new BasicStroke(2)); // åŠ ç²—è¾¹æ¡†
                        g2d.drawRect(x, y, width, height);
                    }
                    // ç»˜åˆ¶å·²ä¿å­˜çš„区域
                    for (RectArea area : areas) {
                        if (area.pageIndex == currentPageIndex) {
                            Graphics2D g2d = (Graphics2D) g;
                            g2d.setColor(new Color(0, 255, 0, 80)); // ç»¿è‰²åŠé€æ˜Žå¡«å……
                            g2d.fillRect(area.x, area.y, area.width, area.height);
                            g2d.setColor(Color.GREEN); // ç»¿è‰²è¾¹æ¡†
                            g2d.setStroke(new BasicStroke(2)); // åŠ ç²—è¾¹æ¡†
                            g2d.drawRect(area.x, area.y, area.width, area.height);
                            // æ˜¾ç¤ºåŒºåŸŸåç§°
                            g2d.setColor(Color.BLUE);
                            g2d.setFont(new Font("宋体", Font.BOLD, 12));
                            g2d.drawString(area.name, area.x + 5, area.y + 15);
                        }
                    }
                }
                { // åˆå§‹åŒ–鼠标事件
                    addMouseListener(new java.awt.event.MouseAdapter() {
                        @Override
                        public void mousePressed(java.awt.event.MouseEvent e) {
                            startPoint = e.getPoint();
                            endPoint = e.getPoint();
                            isDrawing = true;
                        }
                        @Override
                        public void mouseReleased(java.awt.event.MouseEvent e) {
                            endPoint = e.getPoint();
                            isDrawing = false;
                            // è®¡ç®—选择的区域
                            int x = Math.min(startPoint.x, endPoint.x);
                            int y = Math.min(startPoint.y, endPoint.y);
                            int width = Math.abs(endPoint.x - startPoint.x);
                            int height = Math.abs(endPoint.y - startPoint.y);
                            // å¦‚果选择的区域足够大,添加到列表
                            if (width > 10 && height > 10) {
                                // ç¡®ä¿å¯¹è¯æ¡†åœ¨æœ€ä¸Šå±‚
                                SwingUtilities.invokeLater(() -> {
                                    String areaName = JOptionPane.showInputDialog(
                                            PdfAreaSelectionDialog.this,
                                            "请输入区域名称:",
                                            "区域名称",
                                            JOptionPane.PLAIN_MESSAGE);
                                    if (areaName != null && !areaName.trim().isEmpty()) {
                                        // åˆ›å»ºæ–°åŒºåŸŸ
                                        RectArea newArea = new RectArea(currentPageIndex, x, y, width, height,
                                                areaName.trim());
                                        areas.add(newArea);
                                        // ç«‹å³è½¬æ¢ä¸ºPDF坐标
                                        try {
                                            PDPage page = document.getPage(currentPageIndex);
                                            org.apache.pdfbox.pdmodel.common.PDRectangle mediaBox = page.getMediaBox();
                                            float pageWidth = mediaBox.getWidth();
                                            float pageHeight = mediaBox.getHeight();
                                            // ä½¿ç”¨å½“前已渲染的图像尺寸进行转换
                                            if (currentImage != null) {
                                                int imageWidth = currentImage.getWidth();
                                                int imageHeight = currentImage.getHeight();
                                                float xScaleFactor = pageWidth / imageWidth;
                                                float yScaleFactor = pageHeight / imageHeight;
                                                float pdfX = x * xScaleFactor;
                                                float pdfY = pageHeight - (y + height) * yScaleFactor;
                                                float pdfWidth = width * xScaleFactor;
                                                float pdfHeight = height * yScaleFactor;
                                                newArea.setPdfCoordinates(pdfX, pdfY, pdfWidth, pdfHeight);
                                            }
                                        } catch (Exception ex) {
                                            appendLog("添加区域时坐标转换失败: " + ex.getMessage());
                                        }
                                        updateAreaList();
                                        pdfPreviewPanel.repaint();
                                    }
                                });
                            }
                            repaint();
                        }
                    });
                    addMouseMotionListener(new java.awt.event.MouseMotionAdapter() {
                        @Override
                        public void mouseDragged(java.awt.event.MouseEvent e) {
                            endPoint = e.getPoint();
                            repaint();
                        }
                    });
                }
            };
            pdfPreviewPanel.setSize(new Dimension(599,750));
            scrollablePreview.setViewportView(pdfPreviewPanel);
            // åˆ›å»ºå¸¦è¦†ç›–层的预览面板
            JPanel previewWithOverlay = new JPanel(new BorderLayout());
            previewWithOverlay.add(scrollablePreview, BorderLayout.CENTER);
            // åŠ è½½ç¬¬ä¸€é¡µ
            loadPage(currentPageIndex);
            // é¡µé¢æŽ§åˆ¶äº‹ä»¶
            prevPageBtn.addActionListener(e -> {
                if (currentPageIndex > 0) {
                    currentPageIndex--;
                    loadPage(currentPageIndex);
                    pageLabel.setText("页面: " + (currentPageIndex + 1) + " / " + totalPages);
                }
            });
            nextPageBtn.addActionListener(e -> {
                if (currentPageIndex < totalPages - 1) {
                    currentPageIndex++;
                    loadPage(currentPageIndex);
                    pageLabel.setText("页面: " + (currentPageIndex + 1) + " / " + totalPages);
                }
            });
            // æ·»åŠ åˆ°é¢„è§ˆé¢æ¿
            previewPanel.add(pageControlPanel, BorderLayout.NORTH);
            previewPanel.add(previewWithOverlay, BorderLayout.CENTER);
            // åˆ›å»ºåŒºåŸŸåˆ—表和控制按钮
            JPanel rightPanel = new JPanel(new BorderLayout(5, 5));
            rightPanel.setPreferredSize(new Dimension(250, -1));
            // åŒºåŸŸåˆ—表
            JPanel areaListPanel = new JPanel(new BorderLayout(5, 5));
            JLabel areaListLabel = new JLabel("已选择的区域");
            areaListModel = new DefaultListModel<>();
            areaList = new JList<>(areaListModel);
            JScrollPane areaListScrollPane = new JScrollPane(areaList);
            areaListScrollPane.setPreferredSize(new Dimension(-1, 200));
            areaListPanel.add(areaListLabel, BorderLayout.NORTH);
            areaListPanel.add(areaListScrollPane, BorderLayout.CENTER);
            // æŽ§åˆ¶æŒ‰é’®
            JPanel controlPanel = new JPanel(new FlowLayout(FlowLayout.CENTER, 10, 10));
            JButton removeAreaBtn = new JButton("移除选中区域");
            JButton clearAreasBtn = new JButton("清空所有区域");
            controlPanel.add(removeAreaBtn);
            controlPanel.add(clearAreasBtn);
            // ç¡®è®¤å’Œå–消按钮
            JPanel confirmPanel = new JPanel(new FlowLayout(FlowLayout.RIGHT, 10, 10));
            JButton confirmBtn = new JButton("确认");
            JButton cancelBtn = new JButton("取消");
            // æ¸²æŸ“时使用的DPI值
            final float RENDER_DPI = 72.0f;
            // PDF默认DPI
            final float PDF_DPI = 72.0f;
            // åƒç´ åˆ°PDF点的转换因子
            float pixelToPointFactor = PDF_DPI / RENDER_DPI;
            confirmPanel.add(confirmBtn);
            confirmPanel.add(cancelBtn);
            // æ·»åŠ åˆ°å³ä¾§é¢æ¿
            rightPanel.add(areaListPanel, BorderLayout.NORTH);
            rightPanel.add(controlPanel, BorderLayout.CENTER);
            rightPanel.add(confirmPanel, BorderLayout.SOUTH);
            // æ·»åŠ åˆ°ä¸»é¢æ¿
            mainPanel.add(previewPanel, BorderLayout.CENTER);
            mainPanel.add(rightPanel, BorderLayout.EAST);
            // æ·»åŠ äº‹ä»¶ç›‘å¬å™¨
            removeAreaBtn.addActionListener(e -> {
                int[] selectedIndices = areaList.getSelectedIndices();
                if (selectedIndices != null && selectedIndices.length > 0) {
                    // ä»ŽåŽå¾€å‰åˆ é™¤ï¼Œé¿å…ç´¢å¼•æ··ä¹±
                    for (int i = selectedIndices.length - 1; i >= 0; i--) {
                        areas.remove(selectedIndices[i]);
                    }
                    updateAreaList();
                    pdfPreviewPanel.repaint(); // é‡ç»˜é¢„览面板以更新区域显示
                }
            });
            clearAreasBtn.addActionListener(e -> {
                areas.clear();
                updateAreaList();
                pdfPreviewPanel.repaint(); // é‡ç»˜é¢„览面板以清除所有区域
            });
            confirmBtn.addActionListener(e -> {
                // åœ¨ç¡®è®¤ä¹‹å‰ï¼Œå°†æ‰€æœ‰åŒºåŸŸçš„GUI坐标转换为PDF坐标
                try {
                    if (areas.isEmpty()) {
                        appendLog("没有选择任何区域,无需转换坐标");
                    } else {
                        for (RectArea area : areas) {
                            PDPage page = document.getPage(area.pageIndex);
                            // èŽ·å–PDF页面的媒体框(实际尺寸)
                            org.apache.pdfbox.pdmodel.common.PDRectangle mediaBox = page.getMediaBox();
                            float pageWidth = mediaBox.getWidth(); // PDF页面宽度(点)
                            float pageHeight = mediaBox.getHeight(); // PDF页面高度(点)
                            // appendLog("PDF页面尺寸: å®½åº¦=" + pageWidth + "点, é«˜åº¦=" + pageHeight + "点");
                            // é‡æ–°æ¸²æŸ“当前页面以获取准确的图像尺寸
                            PDFRenderer renderer = new PDFRenderer(document);
                            BufferedImage pageImage = renderer.renderImageWithDPI(area.pageIndex, RENDER_DPI);
                            int imageWidth = pageImage.getWidth(); // æ¸²æŸ“图像宽度(像素)
                            int imageHeight = pageImage.getHeight(); // æ¸²æŸ“图像高度(像素)
                            // appendLog("渲染图像尺寸: å®½åº¦=" + imageWidth + "像素, é«˜åº¦=" + imageHeight + "像素");
                            // è®¡ç®—水平和垂直方向的转换因子(像素到点)
                            float xScaleFactor = pageWidth / imageWidth;
                            float yScaleFactor = pageHeight / imageHeight;
                            // appendLog("转换因子: x=" + xScaleFactor + ", y=" + yScaleFactor);
                            // è½¬æ¢x坐标和宽度(水平方向)
                            float pdfX = area.x * xScaleFactor;
                            float pdfWidth = area.width * xScaleFactor;
                            // è½¬æ¢y坐标(考虑坐标系方向差异)
                            // PDF坐标系原点在左下角,Swing在左上角
                            float pdfY = area.y * yScaleFactor;
                            float pdfHeight = area.height * yScaleFactor;
                            // è®¾ç½®PDF坐标
                            area.setPdfCoordinates(pdfX, pdfY, pdfWidth, pdfHeight);
                            // appendLog("转换后的PDF坐标: (" + pdfX + "," + pdfY + "," + pdfWidth + "," +
                            // pdfHeight + ")");
                        }
                        // æ›´æ–°åŒºåŸŸåˆ—表,确保显示最新的PDF坐标
                        updateAreaList();
                    }
                } catch (Exception ex) {
                    appendLog("坐标转换失败: " + ex.getMessage());
                    ex.printStackTrace(); // æ‰“印异常堆栈,方便调试
                }
                confirmed = true;
                dispose();
            });
            cancelBtn.addActionListener(e -> {
                confirmed = false;
                dispose();
            });
            setContentPane(mainPanel);
        }
        // åŠ è½½PDF页面
        private void loadPage(int pageIndex) {
            try {
                PDFRenderer renderer = new PDFRenderer(document);
                currentImage = renderer.renderImageWithDPI(pageIndex, 72);
                // è®¾ç½®é¢æ¿çš„首选大小为图像大小
                pdfPreviewPanel.setPreferredSize(new Dimension(currentImage.getWidth(), currentImage.getHeight()));
                pdfPreviewPanel.revalidate();
                pdfPreviewPanel.repaint();
            } catch (IOException ex) {
                appendLog("加载PDF页面失败: " + ex.getMessage());
            }
        }
        // æ›´æ–°åŒºåŸŸåˆ—表
        private void updateAreaList() {
            areaListModel.clear();
            for (RectArea area : areas) {
                areaListModel.addElement(area.toString());
            }
            // ç¡®ä¿é¢„览面板正确显示当前页面的所有区域
            pdfPreviewPanel.repaint();
        }
        // èŽ·å–ç”¨æˆ·æ˜¯å¦ç¡®è®¤äº†é€‰æ‹©
        public boolean isConfirmed() {
            return confirmed;
        }
        // é‡å†™dispose方法以确保关闭PDF文档
        @Override
        public void dispose() {
            super.dispose();
            if (document != null) {
                try {
                    document.close();
                } catch (Exception e) {
                    // å¿½ç•¥å…³é—­å¼‚常
                }
            }
        }
        // èŽ·å–ç”¨æˆ·é€‰æ‹©çš„åŒºåŸŸåˆ—è¡¨
        public List<RectArea> getSelectedAreas() {
            return new ArrayList<>(areas);
        }
    }
    /**
     * é€‰æ‹©PDF文件方法
     */
    private void selectBatchFiles() {
        // åˆ›å»ºæ–‡ä»¶é€‰æ‹©å™¨
        JFileChooser fileChooser = new JFileChooser();
        // è®¾ç½®å¤šé€‰æ¨¡å¼
        fileChooser.setMultiSelectionEnabled(true);
        // è®¾ç½®æ–‡ä»¶è¿‡æ»¤å™¨ï¼Œåªæ˜¾ç¤ºPDF文件
        fileChooser.setFileFilter(new FileNameExtensionFilter("PDF文件 (*.pdf)", "pdf"));
        // æ˜¾ç¤ºæ–‡ä»¶é€‰æ‹©å¯¹è¯æ¡†
        int result = fileChooser.showOpenDialog(supper);
        if (result == JFileChooser.APPROVE_OPTION) {
            // èŽ·å–ç”¨æˆ·é€‰æ‹©çš„æ–‡ä»¶
            File[] selectedFiles = fileChooser.getSelectedFiles();
            if (selectedFiles != null && selectedFiles.length > 0) {
                int addedCount = 0;
                // éåŽ†é€‰æ‹©çš„æ–‡ä»¶å¹¶æ·»åŠ åˆ°åˆ—è¡¨
                for (File file : selectedFiles) {
                    String filePath = file.getAbsolutePath();
                    String fileName = file.getName();
                    // æ£€æŸ¥æ–‡ä»¶æ˜¯å¦å·²ç»åœ¨åˆ—表中
                    boolean isExist = false;
                    for (int i = 0; i < batchFileListModel.size(); i++) {
                        if (batchFileListModel.getElementAt(i).contains(filePath)) {
                            isExist = true;
                            break;
                        }
                    }
                    if (!isExist) {
                        // å°†æ–‡ä»¶æ·»åŠ åˆ°åˆ—è¡¨ä¸­ï¼Œæ˜¾ç¤ºæ–‡ä»¶åå’Œè·¯å¾„
                        batchFileListModel.addElement(fileName + " (" + filePath + ")");
                        addedCount++;
                    }
                }
                // è®°å½•日志
                appendLog("成功添加 " + addedCount + " ä¸ªPDF文件到列表");
            }
        } else {
            // ç”¨æˆ·å–消了选择
            appendLog("用户取消了文件选择");
        }
    }
    /**
     * ç§»é™¤é€‰ä¸­çš„PDF文件
     */
    private void removeSelectedBatchFiles() {
        // èŽ·å–é€‰ä¸­çš„ç´¢å¼•
        int[] selectedIndices = batchFileList.getSelectedIndices();
        if (selectedIndices != null && selectedIndices.length > 0) {
            // ä»ŽåŽå¾€å‰åˆ é™¤ï¼Œé¿å…ç´¢å¼•æ··ä¹±
            for (int i = selectedIndices.length - 1; i >= 0; i--) {
                batchFileListModel.remove(selectedIndices[i]);
            }
            // è®°å½•日志
            appendLog("成功移除 " + selectedIndices.length + " ä¸ªé€‰ä¸­çš„PDF文件");
        } else {
            // æ²¡æœ‰é€‰ä¸­ä»»ä½•文件
            appendLog("请先选择要移除的PDF文件");
        }
    }
    /**
     * æ¸…空所有PDF文件列表
     */
    private void clearAllBatchFiles() {
        if (batchFileListModel.size() > 0) {
            // è®°å½•要清空的文件数量
            int fileCount = batchFileListModel.size();
            // æ¸…空列表
            batchFileListModel.clear();
            // è®°å½•日志
            appendLog("成功清空所有 " + fileCount + " ä¸ªPDF文件");
        } else {
            // åˆ—表已经为空
            appendLog("文件列表已经为空");
        }
    }
    /**
     * å‘日志区域添加信息
     */
    public void appendLog(String message) {
        SwingUtilities.invokeLater(() -> {
        String timestamp = new java.text.SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new java.util.Date());
        batchLogArea.append("[" + timestamp + "] " + message + "\n");
        // è‡ªåŠ¨æ»šåŠ¨åˆ°åº•éƒ¨
        batchLogArea.setCaretPosition(batchLogArea.getDocument().getLength());
        });
    }
}
src/main/java/com/xindao/ocr/swingui/swing/jpanel/PdfPreviewPanel.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,202 @@
package com.xindao.ocr.swingui.swing.jpanel;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;
import javax.swing.*;
import java.awt.*;
import java.awt.event.MouseAdapter;
import java.awt.event.MouseEvent;
import java.awt.event.MouseMotionAdapter;
import java.awt.geom.AffineTransform;
import java.awt.geom.Rectangle2D;
import java.awt.image.BufferedImage;
import java.io.IOException;
public class PdfPreviewPanel extends JPanel {
        private PDDocument document;
        private int pageNumber;
        private BufferedImage pageImage;
        private Rectangle2D selection = null;
        private Rectangle2D lastSelection = null; // ä¸Šæ¬¡é€‰æ‹©çš„区域
        private Point startDrag = null;
        private float scale = 1.0f;
        private Point translation = new Point(0, 0); // å¹³ç§»é‡
        private Point lastDragPoint = null;
        private float pdfWidth;
        private float pdfHeight;
        private boolean isDraggingPage = false; // æ˜¯å¦æ­£åœ¨æ‹–拽页面
        private static final Color SELECTION_COLOR = new Color(255, 193, 7, 100); // åŠé€æ˜Žé»„色
        private static final Color LAST_SELECTION_COLOR = new Color(76, 175, 80, 100); // åŠé€æ˜Žç»¿è‰²ï¼Œç”¨äºŽæ ‡è¯†ä¸Šæ¬¡é€‰æ‹©çš„区域
        public PdfPreviewPanel(PDDocument doc, int pageNum, Rectangle2D lastArea) throws IOException {
            this.document = doc;
            this.pageNumber = pageNum;
            this.lastSelection = lastArea;
            // æ¸²æŸ“PDF页面为图片
            PDFRenderer renderer = new PDFRenderer(doc);
            pageImage = renderer.renderImage(pageNumber);
            // èŽ·å–PDF页面尺寸
            pdfWidth = pageImage.getWidth();
            pdfHeight = pageImage.getHeight();
            // å¦‚果有上次选择的区域,默认选中
            if (lastSelection != null) {
                this.selection = lastSelection;
            }
            // æ·»åŠ é¼ æ ‡ç›‘å¬å™¨å¤„ç†é€‰æ‹©å’Œæ‹–æ‹½
            addMouseListener(new MouseAdapter() {
                @Override
                public void mousePressed(MouseEvent e) {
                    // æ£€æŸ¥ç‚¹å‡»ä½ç½®æ˜¯å¦åœ¨PDF页面上
                    Rectangle2D pageBounds = getPageBounds();
                    if (pageBounds.contains(e.getPoint())) {
                        // æ£€æŸ¥æ˜¯å¦ç‚¹å‡»åœ¨é€‰æ‹©åŒºåŸŸå†…
                        if (selection != null && selection.contains(e.getPoint())) {
                            startDrag = e.getPoint();
                            isDraggingPage = false;
                        } else {
                            // å¼€å§‹æ–°çš„选择
                            startDrag = e.getPoint();
                            selection = null;
                            isDraggingPage = false;
                        }
                    } else {
                        // ç‚¹å‡»åœ¨é¡µé¢å¤–,准备拖拽整个页面
                        lastDragPoint = e.getPoint();
                        isDraggingPage = true;
                    }
                    repaint();
                }
                @Override
                public void mouseReleased(MouseEvent e) {
                    startDrag = null;
                    lastDragPoint = null;
                    isDraggingPage = false;
                    repaint();
                }
            });
            addMouseMotionListener(new MouseMotionAdapter() {
                @Override
                public void mouseDragged(MouseEvent e) {
                    if (isDraggingPage && lastDragPoint != null) {
                        // æ‹–拽整个页面
                        int dx = e.getX() - lastDragPoint.x;
                        int dy = e.getY() - lastDragPoint.y;
                        translation.x += dx;
                        translation.y += dy;
                        lastDragPoint = e.getPoint();
                        repaint();
                    } else if (startDrag != null) {
                        // æ‹–拽选择区域
                        Point endDrag = e.getPoint();
                        selection = createRectangle(startDrag, endDrag);
                        repaint();
                    }
                }
            });
            // è®¾ç½®é¢æ¿é¦–选大小
            setPreferredSize(new Dimension(pageImage.getWidth(), pageImage.getHeight()));
        }
        // èŽ·å–PDF页面在面板中的边界
        private Rectangle2D getPageBounds() {
            return new Rectangle2D.Double(
                translation.getX(),
                translation.getY(),
                pageImage.getWidth() * scale,
                pageImage.getHeight() * scale
            );
        }
        private Rectangle2D createRectangle(Point p1, Point p2) {
            int x = Math.min(p1.x, p2.x);
            int y = Math.min(p1.y, p2.y);
            int width = Math.abs(p1.x - p2.x);
            int height = Math.abs(p1.y - p2.y);
            return new Rectangle2D.Double(x, y, width, height);
        }
        @Override
        protected void paintComponent(Graphics g) {
            super.paintComponent(g);
            Graphics2D g2 = (Graphics2D) g;
            g2.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
            g2.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BILINEAR);
            // ç»˜åˆ¶èƒŒæ™¯
            g2.setColor(new Color(240, 240, 240));
            g2.fillRect(0, 0, getWidth(), getHeight());
            // ç»˜åˆ¶PDF页面
            if (pageImage != null) {
                // ä¿å­˜å½“前变换
                AffineTransform originalTransform = g2.getTransform();
                // åº”用平移和缩放
                g2.translate(translation.x, translation.y);
                g2.scale(scale, scale);
                // ç»˜åˆ¶PDF图像
                g2.drawImage(pageImage, 0, 0, null);
                // ç»˜åˆ¶ä¸Šæ¬¡é€‰æ‹©çš„区域(如果有)
                if (lastSelection != null && (selection == null || !selection.equals(lastSelection))) {
                    g2.setColor(LAST_SELECTION_COLOR);
                    g2.fill(lastSelection);
                    g2.setColor(new Color(76, 175, 80));
                    g2.setStroke(new BasicStroke(1));
                    g2.draw(lastSelection);
                }
                // ç»˜åˆ¶å½“前选择区域
                if (selection != null) {
                    // è½¬æ¢é€‰æ‹©åŒºåŸŸåæ ‡åˆ°PDF坐标系统
                    Rectangle2D pdfSelection = new Rectangle2D.Double(
                        selection.getX() - translation.x,
                        selection.getY() - translation.y,
                        selection.getWidth(),
                        selection.getHeight()
                    );
                    pdfSelection = new Rectangle2D.Double(
                        pdfSelection.getX() / scale,
                        pdfSelection.getY() / scale,
                        pdfSelection.getWidth() / scale,
                        pdfSelection.getHeight() / scale
                    );
                    g2.setColor(SELECTION_COLOR);
                    g2.fill(pdfSelection);
                    g2.setColor(Color.ORANGE);
                    g2.setStroke(new BasicStroke(2));
                    g2.draw(pdfSelection);
                }
                // æ¢å¤å˜æ¢
                g2.setTransform(originalTransform);
            }
        }
        public Rectangle2D getSelection() {
            return selection;
        }
        public float getScale() {
            return scale;
        }
        public Point getTranslation() {
            return translation;
        }
        public float getPdfHeight() {
            return pdfHeight;
        }
    }
src/main/java/com/xindao/ocr/swingui/swing/utils/FileNameValidator.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,81 @@
package com.xindao.ocr.swingui.swing.utils;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
public class FileNameValidator {
    // Windows系统不允许出现在文件名中的字符
    private static final Set<Character> ILLEGAL_CHARACTERS;
    // Windows系统保留的文件名
    private static final Set<String> RESERVED_NAMES;
    static {
        // åˆå§‹åŒ–非法字符集
        ILLEGAL_CHARACTERS = new HashSet<>(Arrays.asList(
            '/', '\'', '"', '\\', '*', ':', '?', '<', '>', '|'
        ));
        // åˆå§‹åŒ–保留文件名称集合
        RESERVED_NAMES = new HashSet<>(Arrays.asList(
            "con", "prn", "aux", "nul",
            "com1", "com2", "com3", "com4", "com5", "com6", "com7", "com8", "com9",
            "lpt1", "lpt2", "lpt3", "lpt4", "lpt5", "lpt6", "lpt7", "lpt8", "lpt9"
        ));
    }
    /**
     * æ ¡éªŒå¹¶æ¸…理文件名,使其符合Windows命名规则
     * @param fileName åŽŸå§‹æ–‡ä»¶å
     * @param replacement ç”¨äºŽæ›¿æ¢éžæ³•字符的合法字符
     * @return å¤„理后的合法文件名
     */
    public static String validateAndCleanFileName(String fileName, char replacement) {
        if (fileName == null || fileName.trim().isEmpty()) {
            return "unnamed";
        }
        // ç§»é™¤é¦–尾空格
        String cleaned = fileName.trim();
        // æ›¿æ¢éžæ³•字符
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < cleaned.length(); i++) {
            char c = cleaned.charAt(i);
            sb.append(ILLEGAL_CHARACTERS.contains(c) ? replacement : c);
        }
        cleaned = sb.toString();
        // æ£€æŸ¥æ˜¯å¦æ˜¯ä¿ç•™æ–‡ä»¶åï¼ˆä¸åŒºåˆ†å¤§å°å†™ï¼‰
        int dotIndex = cleaned.indexOf('.');
        String nameWithoutExtension = dotIndex != -1 ? cleaned.substring(0, dotIndex) : cleaned;
        if (RESERVED_NAMES.contains(nameWithoutExtension.toLowerCase())) {
            cleaned = replacement + cleaned;
        }
        // å¤„理只包含点或空格的情况
        if (cleaned.replaceAll("[./\\\\ ]", "").isEmpty()) {
            cleaned = "file" + replacement + cleaned;
        }
        // å¤„理以点结尾的情况
        while (cleaned.endsWith(".")) {
            cleaned = cleaned.substring(0, cleaned.length() - 1) + replacement;
        }
        // é™åˆ¶æ–‡ä»¶åé•¿åº¦ï¼ˆWindows通常限制为255个字符)
        if (cleaned.length() > 255) {
            cleaned = cleaned.substring(0, 255);
        }
        return cleaned;
    }
    /**
     * é‡è½½æ–¹æ³•,使用下划线作为默认替换字符
     */
    public static String validateAndCleanFileName(String fileName) {
        return validateAndCleanFileName(fileName, '_');
    }
}
src/main/java/com/xindao/ocr/swingui/swing/utils/GenerateCustomizeComponent.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,122 @@
package com.xindao.ocr.swingui.swing.utils;
import javax.swing.*;
import javax.swing.border.EmptyBorder;
import java.awt.*;
import java.awt.geom.RoundRectangle2D;
/**
 * ç”Ÿæˆè‡ªå®šä¹‰ä¸»é”®å·¥å…·ç±»
 */
public class GenerateCustomizeComponent {
    private static final Color PRIMARY_LIGHT = new Color(100, 150, 255);
    private static final Color PRIMARY_COLOR = new Color(66, 133, 244);
    private static final Color TEXT_COLOR = new Color(51, 51, 51);
    private static final Color SECONDARY_COLOR = new Color(76, 175, 80);
    private static final Color CARD_COLOR = new Color(255, 255, 255);
    // åˆ›å»ºå¡ç‰‡å¼é¢æ¿ï¼ˆå¸¦é˜´å½±å’Œåœ†è§’)
    public static JPanel createCardPanel() {
        JPanel panel = new JPanel() {
            @Override
            protected void paintComponent(Graphics g) {
                super.paintComponent(g);
                Graphics2D g2d = (Graphics2D) g.create();
                g2d.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
                // ç»˜åˆ¶èƒŒæ™¯
                g2d.setColor(CARD_COLOR);
                g2d.fill(new RoundRectangle2D.Double(0, 0, getWidth(), getHeight(), 10, 10));
                // ç»˜åˆ¶é˜´å½±
                g2d.setColor(new Color(0, 0, 0, 10));
                for (int i = 0; i < 3; i++) {
                    g2d.draw(new RoundRectangle2D.Double(i, i, getWidth() - 2*i, getHeight() - 2*i, 10, 10));
                }
                g2d.dispose();
            }
        };
        panel.setOpaque(false);
        panel.setBackground(CARD_COLOR);
        return panel;
    }
    // åˆ›å»ºæ ·å¼åŒ–面板
    public static JPanel createStyledPanel(LayoutManager layout) {
        JPanel panel = new JPanel(layout);
        panel.setOpaque(false);
        return panel;
    }
    // åˆ›å»ºä¸»è¦æŒ‰é’®ï¼ˆå¼ºè°ƒè‰²ï¼‰
    public static JButton createPrimaryButton(String text,Font DEFAULT_FONT) {
        JButton button = new JButton(text) {
            @Override
            protected void paintComponent(Graphics g) {
                Graphics2D g2d = (Graphics2D) g.create();
                g2d.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
                // æ¸å˜èƒŒæ™¯
                GradientPaint gradient;
                if (getModel().isPressed()) {
                    gradient = new GradientPaint(0, 0, PRIMARY_COLOR.darker(), 0, getHeight(), PRIMARY_COLOR.darker().darker());
                } else if (getModel().isRollover()) {
                    gradient = new GradientPaint(0, 0, PRIMARY_LIGHT, 0, getHeight(), PRIMARY_COLOR);
                } else {
                    gradient = new GradientPaint(0, 0, PRIMARY_COLOR, 0, getHeight(), PRIMARY_LIGHT);
                }
                g2d.setPaint(gradient);
                g2d.fill(new RoundRectangle2D.Double(0, 0, getWidth(), getHeight(), 6, 6));
                super.paintComponent(g);
                g2d.dispose();
            }
        };
        button.setFont(new Font(DEFAULT_FONT.getName(), Font.BOLD, 14));
        button.setForeground(Color.WHITE);
        button.setBorder(new EmptyBorder(8, 15, 8, 15));
        button.setContentAreaFilled(false);
        button.setFocusPainted(false);
        button.setCursor(Cursor.getPredefinedCursor(Cursor.HAND_CURSOR));
        return button;
    }
    // åˆ›å»ºæ ·å¼åŒ–按钮
    public static JButton createStyledButton(String text,Font font) {
        JButton button = new JButton(text) {
            @Override
            protected void paintComponent(Graphics g) {
                Graphics2D g2d = (Graphics2D) g.create();
                g2d.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
                if (getModel().isPressed()) {
                    g2d.setColor(new Color(230, 230, 230));
                } else if (getModel().isRollover()) {
                    g2d.setColor(new Color(240, 240, 240));
                } else {
                    g2d.setColor(new Color(235, 235, 235));
                }
                g2d.fill(new RoundRectangle2D.Double(0, 0, getWidth(), getHeight(), 6, 6));
                super.paintComponent(g);
                g2d.dispose();
            }
        };
        button.setFont(font);
        button.setForeground(TEXT_COLOR);
        button.setBorder(new EmptyBorder(8, 15, 8, 15));
        button.setContentAreaFilled(false);
        button.setFocusPainted(false);
        button.setCursor(Cursor.getPredefinedCursor(Cursor.HAND_CURSOR));
        return button;
    }
}
src/main/java/com/xindao/ocr/swingui/swing/utils/ToFile.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,179 @@
package com.xindao.ocr.swingui.swing.utils;
import org.bytedeco.opencv.global.opencv_imgcodecs;
import org.bytedeco.opencv.global.opencv_imgproc;
import org.bytedeco.opencv.opencv_core.Mat;
import org.bytedeco.opencv.opencv_core.Size;
import org.springframework.web.multipart.MultipartFile;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.*;
import java.nio.file.Files;
import java.util.Objects;
import java.util.Optional;
/**
 * @author sy
 * @date 2022/11/23 22:03
 */
public class ToFile {
    /**
     * MultipartFile转File
     * @param file
     * @return
     * @throws IOException
     */
    public static File multipartFiletoFile(MultipartFile file) throws IOException {
        File toFile = null;
        if((!file.equals("")) && (file.getSize() > 0)) {
            String filePath = "/tmp/img";
            if(!new File(filePath).exists()) {
                new File(filePath).mkdirs();
            }
            InputStream inputStream = file.getInputStream();
            String fileFullName = file.getOriginalFilename();
            String fileName = fileFullName.substring(0, fileFullName.lastIndexOf("."));
            String prefix = fileFullName.substring(fileFullName.lastIndexOf("."));
            toFile = new File(filePath + fileName + "_" + System.currentTimeMillis() + prefix);
            intputStreamToFile(inputStream, toFile);
            inputStream.close();
        }
        return toFile;
    }
    /**
     * èŽ·å–æ–‡ä»¶æµ
     * @param inputStream
     * @param file
     */
    private static void intputStreamToFile(InputStream inputStream, File file) {
        try (OutputStream outputStream = new FileOutputStream(file)) {
            int bytesRead = 0;
            byte[] buffer = new byte[8192];
            while((bytesRead = inputStream.read(buffer, 0, 8192)) != -1) {
                outputStream.write(buffer, 0, bytesRead);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    /**
     * åˆ é™¤ä¸´æ—¶æ–‡ä»¶
     * @param file
     */
    public static void deleteTempFile(File file) {
        if(Optional.ofNullable(file).isPresent()) {
            File del = new File(file.toURI());
            del.delete();
        }
    }
    /**
     * åˆ é™¤ä¸´æ—¶ç›®å½•下的所有文件
     * @param cacheDir
     */
    public static void deleteTempFiles(File cacheDir) {
        //删除临时目录
        if (cacheDir.exists() && cacheDir.isDirectory()) {
            for (File tempFile : Objects.requireNonNull(cacheDir.listFiles())) {
                if (tempFile.isFile()) {
                    try {
                        Files.delete(tempFile.toPath());
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
            }
        }
    }
    /**
     * ä¿å­˜å›¾åƒåˆ°æŒ‡å®šè·¯å¾„
     * @param image å›¾åƒå¯¹è±¡
     * @param filePath ä¿å­˜è·¯å¾„
     * @param formatName å›¾åƒæ ¼å¼ï¼ˆå¦‚ "png", "jpg")
     * @return æ˜¯å¦ä¿å­˜æˆåŠŸ
     */
    public static boolean saveImage(BufferedImage image, String filePath, String formatName) {
        if (image == null || filePath == null || formatName == null) {
            System.err.println("参数不能为空");
            return false;
        }
        try {
            // åˆ›å»ºæ–‡ä»¶å¯¹è±¡
            File outputFile = new File(filePath);
            // ç¡®ä¿çˆ¶ç›®å½•存在
            File parentDir = outputFile.getParentFile();
            if (parentDir != null && !parentDir.exists()) {
                parentDir.mkdirs(); // é€’归创建目录
            }
            // ä¿å­˜å›¾ç‰‡
            return ImageIO.write(image, formatName, outputFile);
        } catch (IOException e) {
            System.err.println("保存图片失败:" + e.getMessage());
            e.printStackTrace();
            return false;
        }
    }
    /**
     * é¢„处理图像以提高OCR识别率
     * @param inputPath è¾“入图像路径
     */
    public static void preprocessImage(String inputPath) {
        Mat src = opencv_imgcodecs.imread(inputPath);
        if (src.empty()) {
            System.err.println("无法读取图像: " + inputPath);
            return ;
        }
        // 1. ç°åº¦åŒ–
        Mat gray = new Mat();
        opencv_imgproc.cvtColor(src, gray, opencv_imgproc.COLOR_BGR2GRAY);
        // 2. è½»é‡åŽ»å™ªï¼ˆé¿å…è¿‡åº¦æ¨¡ç³ŠH的边缘)
//        Mat blurred = new Mat();
//        opencv_imgproc.GaussianBlur(gray, blurred, new Size(1, 1), 0); // ç¼©å°æ¨¡ç³Šæ ¸ï¼Œä¿ç•™å­—符细节
        // 3. å¯¹æ¯”度增强(改用CLAHE,更精细控制对比度)
        Mat enhanced = new Mat();
        opencv_imgproc.createCLAHE(3, new Size(3, 3)).apply(gray, enhanced); // clipLimit调整对比度强度
//        // 4. äºŒå€¼åŒ–(调整阈值参数,让H的轮廓更锐利)
//        Mat binary = new Mat();
//        opencv_imgproc.adaptiveThreshold(
//                enhanced,
//                binary,
//                255,
//                opencv_imgproc.ADAPTIVE_THRESH_GAUSSIAN_C,
//                opencv_imgproc.THRESH_BINARY_INV,
//                3,   // blockSize缩小,提升局部阈值精度
//                3     // C值调整,控制阈值偏移
//        );
//
//        // 5. å½¢æ€å­¦æ“ä½œï¼ˆè†¨èƒ€+腐蚀,让H的笔画更粗壮)
//        Mat kernel = opencv_imgproc.getStructuringElement(opencv_imgproc.MORPH_RECT, new Size(1, 1)); // å¢žå¤§æ ¸å°ºå¯¸
//        Mat morph = new Mat();
//        opencv_imgproc.dilate(binary, morph,kernel);   // å…ˆè†¨èƒ€ï¼ˆåŠ ç²—å­—ç¬¦ï¼‰
//        opencv_imgproc.erode(morph, morph,kernel);    // å†è…èš€ï¼ˆä¿®å¤è†¨èƒ€åŽçš„边缘,保持字符形状)
        // ä¿å­˜å¹¶é‡Šæ”¾èµ„源
        opencv_imgcodecs.imwrite(inputPath, enhanced);
        src.release();
        gray.release();
//        blurred.release();
        enhanced.release();
//        binary.release();
//        kernel.release();
//        morph.release();
    }
}
src/main/resources/PP_OCRv5/PP-LCNet_x0_25_textline_ori_infer/PP-LCNet_x0_25_textline_ori_infer.onnx
Binary files differ
src/main/resources/PP_OCRv5/PP-OCRv5_mobile_rec_infer/PP-OCRv5_mobile_rec_infer.onnx
Binary files differ
src/main/resources/PP_OCRv5/PP-OCRv5_mobile_rec_infer/dict.txt
¶Ô±ÈÐÂÎļþ
ÎļþÌ«´ó
src/main/resources/PP_OCRv5/PP-OCRv5_server_det_infer/PP-OCRv5_server_det.onnx
Binary files differ
src/main/resources/PP_OCRv5/PP-OCRv5_server_rec_infer/PP-OCRv5_server_rec.onnx
Binary files differ
src/main/resources/PP_OCRv5/PP-OCRv5_server_rec_infer/dict.txt
¶Ô±ÈÐÂÎļþ
ÎļþÌ«´ó
src/main/resources/application.yml
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,2 @@
server:
  port: 8080
src/main/resources/logback-spring.xml
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,63 @@
<?xml version="1.0" encoding="UTF-8"?>
<configuration debug="false" scan="false">
    <property name="log.path" value="logs/${project.artifactId}"/>
    <!-- å½©è‰²æ—¥å¿—格式 -->
    <property name="CONSOLE_LOG_PATTERN"
              value="${CONSOLE_LOG_PATTERN:-%clr(%d{yyyy-MM-dd HH:mm:ss.SSS}){faint} %clr(${LOG_LEVEL_PATTERN:-%5p}) %clr(${PID:- }){magenta} %clr(---){faint} %clr([%15.15t]){faint} %clr(%-40.40logger{39}){cyan} %clr(:){faint} %m%n${LOG_EXCEPTION_CONVERSION_WORD:-%wEx}}"/>
    <!-- å½©è‰²æ—¥å¿—依赖的渲染类 -->
    <conversionRule conversionWord="clr" converterClass="org.springframework.boot.logging.logback.ColorConverter"/>
    <conversionRule conversionWord="wex"
                    converterClass="org.springframework.boot.logging.logback.WhitespaceThrowableProxyConverter"/>
    <conversionRule conversionWord="wEx"
                    converterClass="org.springframework.boot.logging.logback.ExtendedWhitespaceThrowableProxyConverter"/>
    <!-- Console log output -->
    <appender name="console" class="ch.qos.logback.core.ConsoleAppender">
        <encoder>
            <pattern>${CONSOLE_LOG_PATTERN}</pattern>
        </encoder>
    </appender>
    <!-- Log file debug output -->
    <appender name="debug" class="ch.qos.logback.core.rolling.RollingFileAppender">
        <file>${log.path}/debug.log</file>
        <rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
            <fileNamePattern>${log.path}/%d{yyyy-MM, aux}/debug.%d{yyyy-MM-dd}.%i.log.gz</fileNamePattern>
            <maxFileSize>50MB</maxFileSize>
            <maxHistory>30</maxHistory>
        </rollingPolicy>
        <encoder>
            <pattern>%date [%thread] %-5level [%logger{50}] %file:%line - %msg%n</pattern>
        </encoder>
    </appender>
    <!-- Log file error output -->
    <appender name="error" class="ch.qos.logback.core.rolling.RollingFileAppender">
        <file>${log.path}/error.log</file>
        <rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
            <fileNamePattern>${log.path}/%d{yyyy-MM}/error.%d{yyyy-MM-dd}.%i.log.gz</fileNamePattern>
            <maxFileSize>50MB</maxFileSize>
            <maxHistory>30</maxHistory>
        </rollingPolicy>
        <encoder>
            <pattern>%date [%thread] %-5level [%logger{50}] %file:%line - %msg%n</pattern>
        </encoder>
        <filter class="ch.qos.logback.classic.filter.ThresholdFilter">
            <level>ERROR</level>
        </filter>
    </appender>
    <logger name="org.activiti.engine.impl.db" level="DEBUG">
        <appender-ref ref="debug"/>
    </logger>
    <!--nacos å¿ƒè·³ INFO å±è”½-->
    <logger name="com.alibaba.nacos" level="OFF">
        <appender-ref ref="error"/>
    </logger>
    <!-- Level: FATAL 0  ERROR 3  WARN 4  INFO 6  DEBUG 7 -->
    <root level="INFO">
        <appender-ref ref="console"/>
        <appender-ref ref="debug"/>
    </root>
</configuration>