基本信息
源码名称:JAVA OCR
源码大小:1.67M
文件格式:.rar
开发语言:Java
更新时间:2013-07-09
友情提示:(无需注册或充值,赞助后即可获取资源下载链接)
嘿,亲!知识可是无价之宝呢,但咱这精心整理的资料也耗费了不少心血呀。小小地破费一下,绝对物超所值哦!如有下载和支付问题,请联系我们QQ(微信同号):813200300
本次赞助数额为: 20 元×
微信扫码支付:20 元
×
请留下您的邮箱,我们将在2小时内将文件发到您的邮箱
源码介绍
package com.ocr; import java.io.BufferedReader; import java.util.*; import java.io.*; import java.io.File; import java.io.FileInputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.List; import org.jdesktop.swingx.util.OS; public class OCR { private final String LANG_OPTION = "-l"; private final String EOL = System.getProperty("line.separator"); private String tessPath = new File("Tesseract-OCR").getAbsolutePath(); //private String tessPath="C:\\Program Files (x86)\\Tesseract-OCR\\"; public String recognizeText(File imageFile, String imageFormat) throws Exception { System.out.println("in OCR.java recognizeText 47 row tessPath=" tessPath); File tempImage = ImageIOHelper.createImage(imageFile, imageFormat); File outputFile = new File(imageFile.getParentFile(), "output"); StringBuffer strB = new StringBuffer(); List<String> cmd = new ArrayList<String>(); if (OS.isWindowsXP()) { cmd.add(tessPath "\\tesseract"); //cmd.add(tessPath "\\Tesseract-OCR"); } else if (OS.isLinux()) { cmd.add("tesseract"); } else { //cmd.add(tessPath "\\Tesseract-OCR"); cmd.add(tessPath "\\tesseract"); } cmd.add(""); cmd.add(outputFile.getName()); cmd.add(LANG_OPTION); cmd.add("chi_sim"); cmd.add("eng"); ProcessBuilder pb = new ProcessBuilder(); pb.directory(imageFile.getParentFile()); cmd.set(1, tempImage.getName()); pb.command(cmd); pb.redirectErrorStream(true); Process process = pb.start(); //tesseract.exe 1.jpg 1 -l chi_sim int w = process.waitFor(); // delete temp working files tempImage.delete(); if (w == 0) { BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(outputFile .getAbsolutePath() ".txt"), "UTF-8")); String str; while ((str = in.readLine()) != null) { strB.append(str).append(EOL); } in.close(); } else { String msg; switch (w) { case 1: msg = "Errors accessing files. There may be spaces in your image's filename."; break; case 29: msg = "Cannot recognize the image or its selected region."; break; case 31: msg = "Unsupported image format."; break; default: msg = "Errors occurred."; } tempImage.delete(); throw new RuntimeException(msg); } new File(outputFile.getAbsolutePath() ".txt").delete(); return strB.toString(); } }