| | |
| | | import net.sourceforge.tess4j.ITesseract; |
| | | import net.sourceforge.tess4j.Tesseract; |
| | | import net.sourceforge.tess4j.TesseractException; |
| | | |
| | | import javax.imageio.ImageIO; |
| | | import java.awt.image.BufferedImage; |
| | | import java.io.File; |
| | | import java.io.IOException; |
| | | |
| | | public class ImageTextExtractor { |
| | | |
| | | public static String extractTextFromImage(String imagePath) { |
| | | public static String extractTextFromImage(String imagePath) throws IOException { |
| | | File imageFile = new File(imagePath); |
| | | Tesseract tesseract = new Tesseract(); |
| | | ITesseract instance = new Tesseract(); // JNA Interface Mapping |
| | | try { |
| | | tesseract.setLanguage("eng"); |
| | | String result = tesseract.doOCR(imageFile); |
| | | instance.setDatapath("D:\\develop\\OCR\\tessdata"); // set tessdata path |
| | | instance.setLanguage("chi_sim"); // set recognition language |
| | | // instance.setLanguage("eng"); // set recognition language |
| | | instance.setTessVariable("user_defined_dpi","300"); |
| | | BufferedImage image = ImageIO.read(imageFile); |
| | | String result = instance.doOCR(image); |
| | | return result; |
| | | } catch (TesseractException e) { |
| | | e.printStackTrace(); |
| | |
| | | } |
| | | } |
| | | |
| | | public static void main(String[] args) { |
| | | String imagePath = "C:\\Users\\w\\Pictures\\Saved Pictures\\中华人民共和国万岁.jpg"; |
| | | public static void main(String[] args) throws IOException { |
| | | String imagePath = "C:\\Users\\w\\Pictures\\Saved Pictures\\a.png"; |
| | | String extractedText = extractTextFromImage(imagePath); |
| | | System.out.println(extractedText); |
| | | System.out.println("============================================================"); |
| | | String imagePath1 = "C:\\Users\\w\\Pictures\\Saved Pictures\\b.png"; |
| | | String extractedText1 = extractTextFromImage(imagePath1); |
| | | System.out.println(extractedText1); |
| | | } |
| | | } |
| | | } |
| | | |