ai
  • Crypto News
  • Ai
  • eSports
  • Bitcoin
  • Ethereum
  • Blockchain
Home»Ai»How to Build a Multilingual OCR AI Agent in Python with EasyOCR and OpenCV
Ai

How to Build a Multilingual OCR AI Agent in Python with EasyOCR and OpenCV

Share
Facebook Twitter LinkedIn Pinterest Email
class AdvancedOCRAgent:
   """
   Advanced OCR AI Agent with preprocessing, multi-language support,
   and intelligent text extraction capabilities.
   """
  
   def __init__(self, languages: List[str] = ['en'], gpu: bool = True):
       """Initialize OCR agent with specified languages."""
       print("🤖 Initializing Advanced OCR Agent...")
       self.languages = languages
       self.reader = easyocr.Reader(languages, gpu=gpu)
       self.confidence_threshold = 0.5
       print(f"✅ OCR Agent ready! Languages: {languages}")
  
   def upload_image(self) -> Optional[str]:
       """Upload image file through Colab interface."""
       print("📁 Upload your image file:")
       uploaded = files.upload()
       if uploaded:
           filename = list(uploaded.keys())[0]
           print(f"✅ Uploaded: {filename}")
           return filename
       return None
  
   def preprocess_image(self, image: np.ndarray, enhance: bool = True) -> np.ndarray:
       """Advanced image preprocessing for better OCR accuracy."""
       if len(image.shape) == 3:
           gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
       else:
           gray = image.copy()
      
       if enhance:
           clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
           gray = clahe.apply(gray)
          
           gray = cv2.fastNlMeansDenoising(gray)
          
           kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
           gray = cv2.filter2D(gray, -1, kernel)
      
       binary = cv2.adaptiveThreshold(
           gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2
       )
      
       return binary
  
   def extract_text(self, image_path: str, preprocess: bool = True) -> Dict:
       """Extract text from image with advanced processing."""
       print(f"🔍 Processing image: {image_path}")
      
       image = cv2.imread(image_path)
       if image is None:
           raise ValueError(f"Could not load image: {image_path}")
      
       if preprocess:
           processed_image = self.preprocess_image(image)
       else:
           processed_image = image
      
       results = self.reader.readtext(processed_image)
      
       extracted_data = {
           'raw_results': results,
           'filtered_results': [],
           'full_text': '',
           'confidence_stats': {},
           'word_count': 0,
           'line_count': 0
       }
      
       high_confidence_text = []
       confidences = []
      
       for (bbox, text, confidence) in results:
           if confidence >= self.confidence_threshold:
               extracted_data['filtered_results'].append({
                   'text': text,
                   'confidence': confidence,
                   'bbox': bbox
               })
               high_confidence_text.append(text)
               confidences.append(confidence)
      
       extracted_data['full_text'] = ' '.join(high_confidence_text)
       extracted_data['word_count'] = len(extracted_data['full_text'].split())
       extracted_data['line_count'] = len(high_confidence_text)
      
       if confidences:
           extracted_data['confidence_stats'] = {
               'mean': np.mean(confidences),
               'min': np.min(confidences),
               'max': np.max(confidences),
               'std': np.std(confidences)
           }
      
       return extracted_data
  
   def visualize_results(self, image_path: str, results: Dict, show_bbox: bool = True):
       """Visualize OCR results with bounding boxes."""
       image = cv2.imread(image_path)
       image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
      
       plt.figure(figsize=(15, 10))
      
       if show_bbox:
           plt.subplot(2, 2, 1)
           img_with_boxes = image_rgb.copy()
          
           for item in results['filtered_results']:
               bbox = np.array(item['bbox']).astype(int)
               cv2.polylines(img_with_boxes, [bbox], True, (255, 0, 0), 2)
              
               x, y = bbox[0]
               cv2.putText(img_with_boxes, f"{item['confidence']:.2f}",
                          (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
          
           plt.imshow(img_with_boxes)
           plt.title("OCR Results with Bounding Boxes")
           plt.axis('off')
      
       plt.subplot(2, 2, 2)
       processed = self.preprocess_image(image)
       plt.imshow(processed, cmap='gray')
       plt.title("Preprocessed Image")
       plt.axis('off')
      
       plt.subplot(2, 2, 3)
       confidences = [item['confidence'] for item in results['filtered_results']]
       if confidences:
           plt.hist(confidences, bins=20, alpha=0.7, color="blue")
           plt.xlabel('Confidence Score')
           plt.ylabel('Frequency')
           plt.title('Confidence Score Distribution')
           plt.axvline(self.confidence_threshold, color="red", linestyle="--",
                      label=f'Threshold: {self.confidence_threshold}')
           plt.legend()
      
       plt.subplot(2, 2, 4)
       stats = results['confidence_stats']
       if stats:
           labels = ['Mean', 'Min', 'Max']
           values = [stats['mean'], stats['min'], stats['max']]
           plt.bar(labels, values, color=['green', 'red', 'blue'])
           plt.ylabel('Confidence Score')
           plt.title('Confidence Statistics')
           plt.ylim(0, 1)
      
       plt.tight_layout()
       plt.show()
  
   def smart_text_analysis(self, text: str) -> Dict:
       """Perform intelligent analysis of extracted text."""
       analysis = {
           'language_detection': 'unknown',
           'text_type': 'unknown',
           'key_info': {},
           'patterns': []
       }
      
       email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
       phone_pattern = r'(\+\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}'
       url_pattern = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
       date_pattern = r'\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b'
      
       patterns = {
           'emails': re.findall(email_pattern, text, re.IGNORECASE),
           'phones': re.findall(phone_pattern, text),
           'urls': re.findall(url_pattern, text, re.IGNORECASE),
           'dates': re.findall(date_pattern, text)
       }
      
       analysis['patterns'] = {k: v for k, v in patterns.items() if v}
      
       if any(patterns.values()):
           if patterns.get('emails') or patterns.get('phones'):
               analysis['text_type'] = 'contact_info'
           elif patterns.get('urls'):
               analysis['text_type'] = 'web_content'
           elif patterns.get('dates'):
               analysis['text_type'] = 'document_with_dates'
      
       if re.search(r'[а-яё]', text.lower()):
           analysis['language_detection'] = 'russian'
       elif re.search(r'[àáâãäåæçèéêëìíîïñòóôõöøùúûüý]', text.lower()):
           analysis['language_detection'] = 'romance_language'
       elif re.search(r'[一-龯]', text):
           analysis['language_detection'] = 'chinese'
       elif re.search(r'[ひらがなカタカナ]', text):
           analysis['language_detection'] = 'japanese'
       elif re.search(r'[a-zA-Z]', text):
           analysis['language_detection'] = 'latin_based'
      
       return analysis
  
   def process_batch(self, image_folder: str) -> List[Dict]:
       """Process multiple images in batch."""
       results = []
       supported_formats = ('.png', '.jpg', '.jpeg', '.bmp', '.tiff')
      
       for filename in os.listdir(image_folder):
           if filename.lower().endswith(supported_formats):
               image_path = os.path.join(image_folder, filename)
               try:
                   result = self.extract_text(image_path)
                   result['filename'] = filename
                   results.append(result)
                   print(f"✅ Processed: {filename}")
               except Exception as e:
                   print(f"❌ Error processing {filename}: {str(e)}")
      
       return results
  
   def export_results(self, results: Dict, format: str="json") -> str:
       """Export results in specified format."""
       if format.lower() == 'json':
           output = json.dumps(results, indent=2, ensure_ascii=False)
           filename="ocr_results.json"
       elif format.lower() == 'txt':
           output = results['full_text']
           filename="extracted_text.txt"
       else:
           raise ValueError("Supported formats: 'json', 'txt'")
      
       with open(filename, 'w', encoding='utf-8') as f:
           f.write(output)
      
       print(f"📄 Results exported to: {filename}")
       return filename
Share. Facebook Twitter Pinterest LinkedIn Tumblr Email

Related Posts

How Can We Build Scalable and Reproducible Machine Learning Experiment Pipelines Using Meta Research Hydra?

novembre 5, 2025

The Download: the AGI myth, and US/China AI competition

novembre 4, 2025

Why the for-profit race into solar geoengineering is bad for science and public trust

novembre 4, 2025

3 Questions: How AI is helping us monitor and support vulnerable ecosystems | MIT News

novembre 4, 2025
Add A Comment

Comments are closed.

Top Posts

SwissCryptoDaily.ch delivers the latest cryptocurrency news, market insights, and expert analysis. Stay informed with daily updates from the world of blockchain and digital assets.

We're social. Connect with us:

Facebook X (Twitter) Instagram Pinterest YouTube
Top Insights

Wintermute CEO Denies Binance Lawsuit Rumors

novembre 5, 2025

Ethereum ETF outflows deepens as ETH struggles around $3,300

novembre 5, 2025

NFT Market Cap Drops 46% in 30 Days as Blue-Chip Prices Plunge

novembre 5, 2025
Get Informed

Subscribe to Updates

Get the latest creative news from FooBar about art, design and business.

Facebook X (Twitter) Instagram Pinterest
  • About us
  • Get In Touch
  • Cookies Policy
  • Privacy-Policy
  • Terms and Conditions
© 2025 Swisscryptodaily.ch.

Type above and press Enter to search. Press Esc to cancel.