ai
  • Crypto News
  • Ai
  • eSports
  • Bitcoin
  • Ethereum
  • Blockchain
Home»Ai»Building an Advanced Convolutional Neural Network with Attention for DNA Sequence Classification and Interpretability
Ai

Building an Advanced Convolutional Neural Network with Attention for DNA Sequence Classification and Interpretability

Share
Facebook Twitter LinkedIn Pinterest Email
class DNASequenceClassifier:
   def __init__(self, sequence_length=200, num_classes=2):
       self.sequence_length = sequence_length
       self.num_classes = num_classes
       self.model = None
       self.history = None
      
   def one_hot_encode(self, sequences):
       mapping = {'A': 0, 'T': 1, 'G': 2, 'C': 3}
       encoded = np.zeros((len(sequences), self.sequence_length, 4))
      
       for i, seq in enumerate(sequences):
           for j, nucleotide in enumerate(seq[:self.sequence_length]):
               if nucleotide in mapping:
                   encoded[i, j, mapping[nucleotide]] = 1
       return encoded
  
   def attention_layer(self, inputs, name="attention"):
       attention_weights = layers.Dense(1, activation='tanh', name=f"{name}_weights")(inputs)
       attention_weights = layers.Flatten()(attention_weights)
       attention_weights = layers.Activation('softmax', name=f"{name}_softmax")(attention_weights)
       attention_weights = layers.RepeatVector(inputs.shape[-1])(attention_weights)
       attention_weights = layers.Permute([2, 1])(attention_weights)
      
       attended = layers.Multiply(name=f"{name}_multiply")([inputs, attention_weights])
       return layers.GlobalMaxPooling1D()(attended)
  
   def build_model(self):
       inputs = layers.Input(shape=(self.sequence_length, 4), name="dna_input")
      
       conv_layers = []
       filter_sizes = [3, 7, 15, 25]
      
       for i, filter_size in enumerate(filter_sizes):
           conv = layers.Conv1D(
               filters=64,
               kernel_size=filter_size,
               activation='relu',
               padding='same',
               name=f"conv_{filter_size}"
           )(inputs)
           conv = layers.BatchNormalization(name=f"bn_conv_{filter_size}")(conv)
           conv = layers.Dropout(0.2, name=f"dropout_conv_{filter_size}")(conv)
          
           attended = self.attention_layer(conv, name=f"attention_{filter_size}")
           conv_layers.append(attended)
      
       if len(conv_layers) > 1:
           merged = layers.Concatenate(name="concat_multiscale")(conv_layers)
       else:
           merged = conv_layers[0]
      
       dense = layers.Dense(256, activation='relu', name="dense_1")(merged)
       dense = layers.BatchNormalization(name="bn_dense_1")(dense)
       dense = layers.Dropout(0.5, name="dropout_dense_1")(dense)
      
       dense = layers.Dense(128, activation='relu', name="dense_2")(dense)
       dense = layers.BatchNormalization(name="bn_dense_2")(dense)
       dense = layers.Dropout(0.3, name="dropout_dense_2")(dense)
      
       if self.num_classes == 2:
           outputs = layers.Dense(1, activation='sigmoid', name="output")(dense)
           loss="binary_crossentropy"
           metrics = ['accuracy', 'precision', 'recall']
       else:
           outputs = layers.Dense(self.num_classes, activation='softmax', name="output")(dense)
           loss="categorical_crossentropy"
           metrics = ['accuracy']
      
       self.model = keras.Model(inputs=inputs, outputs=outputs, name="DNA_CNN_Classifier")
      
       optimizer = keras.optimizers.Adam(
           learning_rate=0.001,
           beta_1=0.9,
           beta_2=0.999,
           epsilon=1e-7
       )
      
       self.model.compile(
           optimizer=optimizer,
           loss=loss,
           metrics=metrics
       )
      
       return self.model
  
   def generate_synthetic_data(self, n_samples=10000):
       sequences = []
       labels = []
      
       positive_motifs = ['TATAAA', 'CAAT', 'GGGCGG', 'TTGACA']
       negative_motifs = ['AAAAAAA', 'TTTTTTT', 'CCCCCCC', 'GGGGGGG']
      
       nucleotides = ['A', 'T', 'G', 'C']
      
       for i in range(n_samples):
           sequence="".join(random.choices(nucleotides, k=self.sequence_length))
          
           if i  0.5).astype(int).flatten()
      
       print("Classification Report:")
       print(classification_report(y_test, y_pred))
      
       fig, axes = plt.subplots(2, 2, figsize=(15, 10))
      
       axes[0,0].plot(self.history.history['loss'], label="Training Loss")
       axes[0,0].plot(self.history.history['val_loss'], label="Validation Loss")
       axes[0,0].set_title('Training History - Loss')
       axes[0,0].set_xlabel('Epoch')
       axes[0,0].set_ylabel('Loss')
       axes[0,0].legend()
      
       axes[0,1].plot(self.history.history['accuracy'], label="Training Accuracy")
       axes[0,1].plot(self.history.history['val_accuracy'], label="Validation Accuracy")
       axes[0,1].set_title('Training History - Accuracy')
       axes[0,1].set_xlabel('Epoch')
       axes[0,1].set_ylabel('Accuracy')
       axes[0,1].legend()
      
       cm = confusion_matrix(y_test, y_pred)
       sns.heatmap(cm, annot=True, fmt="d", ax=axes[1,0], cmap='Blues')
       axes[1,0].set_title('Confusion Matrix')
       axes[1,0].set_ylabel('Actual')
       axes[1,0].set_xlabel('Predicted')
      
       axes[1,1].hist(y_pred_proba[y_test==0], bins=50, alpha=0.7, label="Negative", density=True)
       axes[1,1].hist(y_pred_proba[y_test==1], bins=50, alpha=0.7, label="Positive", density=True)
       axes[1,1].set_title('Prediction Score Distribution')
       axes[1,1].set_xlabel('Prediction Score')
       axes[1,1].set_ylabel('Density')
       axes[1,1].legend()
      
       plt.tight_layout()
       plt.show()
      
       return y_pred, y_pred_proba
Share. Facebook Twitter Pinterest LinkedIn Tumblr Email

Related Posts

How Can We Build Scalable and Reproducible Machine Learning Experiment Pipelines Using Meta Research Hydra?

novembre 5, 2025

The Download: the AGI myth, and US/China AI competition

novembre 4, 2025

Why the for-profit race into solar geoengineering is bad for science and public trust

novembre 4, 2025

3 Questions: How AI is helping us monitor and support vulnerable ecosystems | MIT News

novembre 4, 2025
Add A Comment

Comments are closed.

Top Posts

SwissCryptoDaily.ch delivers the latest cryptocurrency news, market insights, and expert analysis. Stay informed with daily updates from the world of blockchain and digital assets.

We're social. Connect with us:

Facebook X (Twitter) Instagram Pinterest YouTube
Top Insights

Ethereum ETF outflows deepens as ETH struggles around $3,300

novembre 5, 2025

NFT Market Cap Drops 46% in 30 Days as Blue-Chip Prices Plunge

novembre 5, 2025

White House Defends Trump’s Pardon of Binance founder CZ

novembre 5, 2025
Get Informed

Subscribe to Updates

Get the latest creative news from FooBar about art, design and business.

Facebook X (Twitter) Instagram Pinterest
  • About us
  • Get In Touch
  • Cookies Policy
  • Privacy-Policy
  • Terms and Conditions
© 2025 Swisscryptodaily.ch.

Type above and press Enter to search. Press Esc to cancel.