zjh
2024-12-03 06ef175a1e9f72b3863757319b2f6ff76c5a2f05
ltkj-admin/src/test/java/ImageTextExtractor.java
@@ -1,16 +1,24 @@
import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
public class ImageTextExtractor {
    public static String extractTextFromImage(String imagePath) {
    public static String extractTextFromImage(String imagePath) throws IOException {
        File imageFile = new File(imagePath);
        Tesseract tesseract = new Tesseract();
        ITesseract instance = new Tesseract();  // JNA Interface Mapping
        try {
            tesseract.setLanguage("eng");
            String result = tesseract.doOCR(imageFile);
            instance.setDatapath("D:\\develop\\OCR\\tessdata"); // set tessdata path
            instance.setLanguage("chi_sim"); // set recognition language
//            instance.setLanguage("eng"); // set recognition language
            instance.setTessVariable("user_defined_dpi","300");
            BufferedImage image = ImageIO.read(imageFile);
            String result = instance.doOCR(image);
            return result;
        } catch (TesseractException e) {
            e.printStackTrace();
@@ -18,9 +26,14 @@
        }
    }
    public static void main(String[] args) {
        String imagePath = "C:\\Users\\w\\Pictures\\Saved Pictures\\中华人民共和国万岁.jpg";
    public static void main(String[] args) throws IOException {
        String imagePath = "C:\\Users\\w\\Pictures\\Camera Roll\\a.png";
        String extractedText = extractTextFromImage(imagePath);
        System.out.println(extractedText);
        System.out.println("============================================================");
//        String imagePath1 = "C:\\Users\\w\\Pictures\\Saved Pictures\\b.png";
//        String extractedText1 = extractTextFromImage(imagePath1);
//        System.out.println(extractedText1);
    }
}
}