단순 동기로 시작함
리멤버 앱 사용하는데 python으로 명함 인식 하는 프로그램 만드는게 가능하지 않을까로 시작
pyside6, opencv를 공부하면서 작성
결국 openai가 다 만듬 끝(업그레이트 지속 예정)
주의 프로젝트 폴더명이 한글이면 잘 되어서 중간에 영어이름으로 변경함
https://github.com/carcase2/-Python_Mini_businesscard
import sys
import cv2
import pytesseract
from PySide6.QtWidgets import QApplication, QLabel, QVBoxLayout, QWidget, QPushButton, QFileDialog
from PySide6.QtGui import QImage, QPixmap
# Ensure you have the correct Tesseract executable path
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
def read_business_card(image_path):
# Load the image
image = cv2.imread(image_path)
if image is None:
print("Error: Could not load the image. Please check the file path.")
return ""
# Preprocess the image
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, threshold = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
# Extract text using OCR
text = pytesseract.image_to_string(threshold, lang='kor')
return text
class BusinessCardReader(QWidget):
def __init__(self):
super().__init__()
self.label = QLabel()
self.label.setWordWrap(True)
self.button = QPushButton('Open Image')
self.button.clicked.connect(self.load_image)
layout = QVBoxLayout()
layout.addWidget(self.button)
layout.addWidget(self.label)
self.setLayout(layout)
self.setWindowTitle('Business Card Reader')
def load_image(self):
options = QFileDialog.Options()
options |= QFileDialog.ReadOnly
image_path, _ = QFileDialog.getOpenFileName(self, "Open Image", "", "Images (*.png *.xpm *.jpg *.bmp *.jpeg)", options=options)
if image_path:
text = read_business_card(image_path)
self.label.setText(text)
def main():
app = QApplication(sys.argv)
window = BusinessCardReader()
window.show()
sys.exit(app.exec())
if __name__ == '__main__':
main()
결과
과정
1-1. 시작
1-2. 결과
import sys
import cv2
import pytesseract
from PySide6.QtWidgets import QApplication, QLabel, QVBoxLayout, QWidget
from PySide6.QtGui import QImage, QPixmap
# Ensure you have the correct Tesseract executable path
pytesseract.pytesseract.tesseract_cmd = r'/path/to/tesseract.exe'
def read_business_card(image_path):
# Load the image
image = cv2.imread(image_path)
# Preprocess the image
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, threshold = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
# Extract text using OCR
text = pytesseract.image_to_string(threshold)
return text
def main():
app = QApplication(sys.argv)
# Load the business card image
image_path = 'path/to/business_card.jpg'
text = read_business_card(image_path)
# Display the extracted text using PySide6
label = QLabel()
label.setText(text)
label.setWordWrap(True)
layout = QVBoxLayout()
layout.addWidget(label)
window = QWidget()
window.setLayout(layout)
window.setWindowTitle('Business Card Reader')
window.show()
sys.exit(app.exec())
if __name__ == '__main__':
main()
2-1 image_path를 어떻게 넣지를 몰라서 다시 물어봄
2-2 이미지를 불어오는 단추를 만들어서 불러오고 싶음
import sys
import cv2
import pytesseract
from PySide6.QtWidgets import QApplication, QLabel, QVBoxLayout, QWidget, QPushButton, QFileDialog
from PySide6.QtGui import QImage, QPixmap
# Ensure you have the correct Tesseract executable path
pytesseract.pytesseract.tesseract_cmd = r'/path/to/tesseract.exe'
def read_business_card(image_path):
# Load the image
image = cv2.imread(image_path)
# Preprocess the image
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, threshold = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
# Extract text using OCR
text = pytesseract.image_to_string(threshold)
return text
class BusinessCardReader(QWidget):
def __init__(self):
super().__init__()
self.label = QLabel()
self.label.setWordWrap(True)
self.button = QPushButton('Open Image')
self.button.clicked.connect(self.load_image)
layout = QVBoxLayout()
layout.addWidget(self.button)
layout.addWidget(self.label)
self.setLayout(layout)
self.setWindowTitle('Business Card Reader')
def load_image(self):
options = QFileDialog.Options()
options |= QFileDialog.ReadOnly
image_path, _ = QFileDialog.getOpenFileName(self, "Open Image", "", "Images (*.png *.xpm *.jpg *.bmp *.jpeg)", options=options)
if image_path:
text = read_business_card(image_path)
self.label.setText(text)
def main():
app = QApplication(sys.argv)
window = BusinessCardReader()
window.show()
sys.exit(app.exec())
if __name__ == '__main__':
main()
3-1 예외가 발생했습니다. ModuleNotFoundError No module named 'cv2' 발생함)
- cv2가 설치가 안되어서 발생
4-1 pytesseract.pytesseract.TesseractNotFoundError: /path/to/tesseract.exe is not installed or it's not in your PATH. See README file for more information.
- tessearct 설치 했는데 왜 안되지?
'/path/to/tesseract.exe' 라인을 그대로 적어서 문제가된 실제 teeseract 설치된 경로를 넣어줘야 하는거었음
5-1 한글이 인식이 안되어서 문제가됨
- 한글 데이타가 없어서 문제가 됨(설치하면 끝)
https://github.com/tesseract-ocr/tessdata_best/blob/main/kor.traineddata
def read_business_card(image_path):
# Load the image
image = cv2.imread(image_path)
# Check if the image is loaded successfully
if image is None:
print("Error: Could not load the image. Please check the file path.")
return ""
# Preprocess the image
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, threshold = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
# Extract text using OCR with the Korean language setting
text = pytesseract.image_to_string(threshold, lang='kor')
return text
이미지 처리 하는 법을 더 공부해서 명함 인식 확율을 높여 봐야 겠다.
'Study(매일매일한걸음씩) > Python' 카테고리의 다른 글
Mini -Project #3-3 명함 인식 프로그램 만들기(pyside6,openai,opencv) mongodb 에 저장하기(기본) (0) | 2023.04.13 |
---|---|
Mini -Project #3-2 명함 인식 프로그램 만들기(pyside6,openai,opencv) 전화번호 인식/이메일주소 인식 (0) | 2023.04.04 |
Mini -Project #2-3 GUI 계산기 만들기(pyside6) - 완료(추가설명) (0) | 2023.03.09 |
Mini -Project #2-3 GUI 계산기 만들기(pyside6) - 완료 (0) | 2023.02.13 |
[Study Python] dictionary (0) | 2023.02.11 |
댓글