Upload tbiodeg AttentiveFP model

Browse files

Files changed (5) hide show

README.md +187 -0
config.json +22 -0
inference.py +127 -0
pytorch_model.pt +3 -0
requirements.txt +4 -0

README.md ADDED Viewed

	@@ -0,0 +1,187 @@

+---
+license: mit
+tags:
+- chemistry
+- molecular-property-prediction
+- graph-neural-networks
+- attentivefp
+- pytorch-geometric
+- toxicity-prediction
+language:
+- en
+pipeline_tag: tabular-regression
+---
+# Pyrosage tbiodeg AttentiveFP Model
+## Model Description
+This is an AttentiveFP (Attention-based Fingerprint) Graph Neural Network model trained for tbiodeg regression from the Pyrosage project. The model predicts molecular properties directly from SMILES strings using graph neural networks.
+## Model Details
+- **Model Type**: AttentiveFP (Graph Neural Network)
+- **Task**: Regression
+- **Input**: SMILES strings (molecular representations)
+- **Output**: Continuous numerical value
+- **Framework**: PyTorch Geometric
+- **Architecture**: AttentiveFP with enhanced atom and bond features
+### Hyperparameters
+```json
+{
+  "name": "baseline",
+  "hidden_channels": 64,
+  "num_layers": 2,
+  "num_timesteps": 2,
+  "dropout": 0.2,
+  "learning_rate": 0.001,
+  "weight_decay": 1e-05,
+  "batch_size": 32,
+  "epochs": 50,
+  "patience": 10
+}
+```
+## Usage
+### Installation
+```bash
+pip install torch torch-geometric rdkit-pypi
+```
+### Loading the Model
+```python
+import torch
+from torch_geometric.nn import AttentiveFP
+from rdkit import Chem
+from torch_geometric.data import Data
+# Load the model
+model_dict = torch.load('pytorch_model.pt', map_location='cpu')
+state_dict = model_dict['model_state_dict']
+hyperparams = model_dict['hyperparameters']
+# Create model with correct architecture
+model = AttentiveFP(
+    in_channels=10,  # Enhanced atom features
+    hidden_channels=hyperparams["hidden_channels"],
+    out_channels=1,
+    edge_dim=6,  # Enhanced bond features
+    num_layers=hyperparams["num_layers"],
+    num_timesteps=hyperparams["num_timesteps"],
+    dropout=hyperparams["dropout"],
+)
+model.load_state_dict(state_dict)
+model.eval()
+```
+### Making Predictions
+```python
+def smiles_to_data(smiles):
+    """Convert SMILES string to PyG Data object"""
+    mol = Chem.MolFromSmiles(smiles)
+    if mol is None:
+        return None
+    # Enhanced atom features (10 dimensions)
+    atom_features = []
+    for atom in mol.GetAtoms():
+        features = [
+            atom.GetAtomicNum(),
+            atom.GetTotalDegree(),
+            atom.GetFormalCharge(),
+            atom.GetTotalNumHs(),
+            atom.GetNumRadicalElectrons(),
+            int(atom.GetIsAromatic()),
+            int(atom.IsInRing()),
+            # Hybridization as one-hot (3 dimensions)
+            int(atom.GetHybridization() == Chem.rdchem.HybridizationType.SP),
+            int(atom.GetHybridization() == Chem.rdchem.HybridizationType.SP2),
+            int(atom.GetHybridization() == Chem.rdchem.HybridizationType.SP3)
+        ]
+        atom_features.append(features)
+    x = torch.tensor(atom_features, dtype=torch.float)
+    # Enhanced bond features (6 dimensions)
+    edges_list = []
+    edge_features = []
+    for bond in mol.GetBonds():
+        i = bond.GetBeginAtomIdx()
+        j = bond.GetEndAtomIdx()
+        edges_list.extend([[i, j], [j, i]])
+        features = [
+            # Bond type as one-hot (4 dimensions)
+            int(bond.GetBondType() == Chem.rdchem.BondType.SINGLE),
+            int(bond.GetBondType() == Chem.rdchem.BondType.DOUBLE),
+            int(bond.GetBondType() == Chem.rdchem.BondType.TRIPLE),
+            int(bond.GetBondType() == Chem.rdchem.BondType.AROMATIC),
+            # Additional features (2 dimensions)
+            int(bond.GetIsConjugated()),
+            int(bond.IsInRing())
+        ]
+        edge_features.extend([features, features])
+    if not edges_list:
+        return None
+    edge_index = torch.tensor(edges_list, dtype=torch.long).t()
+    edge_attr = torch.tensor(edge_features, dtype=torch.float)
+    return Data(x=x, edge_index=edge_index, edge_attr=edge_attr)
+def predict(model, smiles):
+    """Make prediction for a SMILES string"""
+    data = smiles_to_data(smiles)
+    if data is None:
+        return None
+    batch = torch.zeros(data.num_nodes, dtype=torch.long)
+    with torch.no_grad():
+        output = model(data.x, data.edge_index, data.edge_attr, batch)
+        return output.item()
+# Example usage
+smiles = "CC(=O)OC1=CC=CC=C1C(=O)O"  # Aspirin
+prediction = predict(model, smiles)
+print(f"Prediction for {smiles}: {prediction}")
+```
+## Training Data
+The model was trained on the tbiodeg dataset from the Pyrosage project, which focuses on molecular toxicity and environmental property prediction.
+## Model Performance
+See training logs for detailed performance metrics.
+## Limitations
+- The model is trained on specific chemical datasets and may not generalize to all molecular types
+- Performance may vary for molecules significantly different from the training distribution
+- Requires proper SMILES string format for input
+## Citation
+If you use this model, please cite the Pyrosage project:
+```bibtex
+@misc{pyrosagetbiodeg,
+  title={Pyrosage tbiodeg AttentiveFP Model},
+  author={UPCI NTUA},
+  year={2025},
+  publisher={Hugging Face},
+  url={https://huggingface.co/upci-ntua/pyrosage-tbiodeg-attentivefp}
+}
+```
+## License
+MIT License - see LICENSE file for details.

config.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+  "model_type": "AttentiveFP",
+  "task_type": "regression",
+  "endpoint": "tbiodeg",
+  "hyperparameters": {
+    "name": "baseline",
+    "hidden_channels": 64,
+    "num_layers": 2,
+    "num_timesteps": 2,
+    "dropout": 0.2,
+    "learning_rate": 0.001,
+    "weight_decay": 1e-05,
+    "batch_size": 32,
+    "epochs": 50,
+    "patience": 10
+  },
+  "input_features": {
+    "atom_features": 10,
+    "bond_features": 6
+  },
+  "framework": "pytorch_geometric"
+}

inference.py ADDED Viewed

	@@ -0,0 +1,127 @@

+#!/usr/bin/env python3
+"""
+Standalone inference script for Pyrosage tbiodeg AttentiveFP Model
+Usage: python inference.py "SMILES_STRING"
+"""
+import sys
+import torch
+from torch_geometric.nn import AttentiveFP
+from rdkit import Chem
+from torch_geometric.data import Data
+def smiles_to_data(smiles):
+    """Convert SMILES string to PyG Data object with enhanced features"""
+    mol = Chem.MolFromSmiles(smiles)
+    if mol is None:
+        return None
+    # Enhanced atom features (10 dimensions)
+    atom_features = []
+    for atom in mol.GetAtoms():
+        features = [
+            atom.GetAtomicNum(),
+            atom.GetTotalDegree(),
+            atom.GetFormalCharge(),
+            atom.GetTotalNumHs(),
+            atom.GetNumRadicalElectrons(),
+            int(atom.GetIsAromatic()),
+            int(atom.IsInRing()),
+            # Hybridization as one-hot (3 dimensions)
+            int(atom.GetHybridization() == Chem.rdchem.HybridizationType.SP),
+            int(atom.GetHybridization() == Chem.rdchem.HybridizationType.SP2),
+            int(atom.GetHybridization() == Chem.rdchem.HybridizationType.SP3)
+        ]
+        atom_features.append(features)
+    x = torch.tensor(atom_features, dtype=torch.float)
+    # Enhanced bond features (6 dimensions)
+    edges_list = []
+    edge_features = []
+    for bond in mol.GetBonds():
+        i = bond.GetBeginAtomIdx()
+        j = bond.GetEndAtomIdx()
+        edges_list.extend([[i, j], [j, i]])
+        features = [
+            # Bond type as one-hot (4 dimensions)
+            int(bond.GetBondType() == Chem.rdchem.BondType.SINGLE),
+            int(bond.GetBondType() == Chem.rdchem.BondType.DOUBLE),
+            int(bond.GetBondType() == Chem.rdchem.BondType.TRIPLE),
+            int(bond.GetBondType() == Chem.rdchem.BondType.AROMATIC),
+            # Additional features (2 dimensions)
+            int(bond.GetIsConjugated()),
+            int(bond.IsInRing())
+        ]
+        edge_features.extend([features, features])
+    if not edges_list:
+        return None
+    edge_index = torch.tensor(edges_list, dtype=torch.long).t()
+    edge_attr = torch.tensor(edge_features, dtype=torch.float)
+    return Data(x=x, edge_index=edge_index, edge_attr=edge_attr)
+def load_model():
+    """Load the AttentiveFP model"""
+    model_dict = torch.load('pytorch_model.pt', map_location='cpu')
+    state_dict = model_dict['model_state_dict']
+    hyperparams = model_dict['hyperparameters']
+    model = AttentiveFP(
+        in_channels=10,  # Enhanced atom features
+        hidden_channels=hyperparams["hidden_channels"],
+        out_channels=1,
+        edge_dim=6,  # Enhanced bond features
+        num_layers=hyperparams["num_layers"],
+        num_timesteps=hyperparams["num_timesteps"],
+        dropout=hyperparams["dropout"],
+    )
+    model.load_state_dict(state_dict)
+    model.eval()
+    return model
+def predict(model, smiles):
+    """Make prediction for a SMILES string"""
+    data = smiles_to_data(smiles)
+    if data is None:
+        return None
+    batch = torch.zeros(data.num_nodes, dtype=torch.long)
+    with torch.no_grad():
+        output = model(data.x, data.edge_index, data.edge_attr, batch)
+        return output.item()
+def main():
+    if len(sys.argv) != 2:
+        print("Usage: python inference.py 'SMILES_STRING'")
+        print("Example: python inference.py 'CC(=O)OC1=CC=CC=C1C(=O)O'")
+        sys.exit(1)
+    smiles = sys.argv[1]
+    print(f"Loading tbiodeg AttentiveFP model...")
+    try:
+        model = load_model()
+        print(f"Making prediction for: {smiles}")
+        prediction = predict(model, smiles)
+        if prediction is not None:
+            print(f'Regression result: {prediction:.4f}')
+        else:
+            print("Error: Could not process SMILES string")
+    except Exception as e:
+        print(f"Error: {e}")
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

pytorch_model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6e422936990a4d3b3458585b80de6f7475e618120c922307f2c18314b58ae2d4
+size 383007

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+torch>=1.9.0
+torch-geometric>=2.0.0
+rdkit-pypi>=2022.3.0
+numpy>=1.21.0