File size: 4,808 Bytes
5e90249
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#!/usr/bin/env
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import numpy as np
import torch
import os


def plot_data_with_distribution_seaborn(log1, log2=None, 
                                        save_path=None, 
                                        label1=None, 
                                        label2=None, 
                                        title=None):
    """
    Plots one or two datasets with the average values and distributions over iterations using Seaborn.

    Parameters:
        log1 (list of lists): The first list of scores (each element is a list of scores for an iteration).
        log2 (list of lists, optional): The second list of scores (each element is a list of scores for an iteration). Defaults to None.
        save_path (str): Path to save the plot. Defaults to None.
        label1 (str): Label for the first dataset. Defaults to "Fraction of Valid Peptide SMILES".
        label2 (str, optional): Label for the second dataset. Defaults to None.
        title (str): Title of the plot. Defaults to "Fraction of Valid Peptides Over Iterations".
    """
    # Prepare data for log1
    data1 = pd.DataFrame({
        "Iteration": np.repeat(range(1, len(log1) + 1), [len(scores) for scores in log1]),
        label1: [score for scores in log1 for score in scores],
        "Dataset": label1,
        "Style": "Log1"
    })

    # Prepare data for log2 if provided
    if log2 is not None:
        data2 = pd.DataFrame({
            "Iteration": np.repeat(range(1, len(log2) + 1), [len(scores) for scores in log2]),
            label2: [score for scores in log2 for score in scores],
            "Dataset": label2,
            "Style": "Log2"
        })
        data = pd.concat([data1, data2], ignore_index=True)
    else:
        data = data1
    
    palette = {
        label1: "#8181ED",  # Default color for log1
        label2: "#D577FF"   # Default color for log2 (if provided)
    }

    # Set Seaborn theme
    sns.set_theme()
    sns.set_context("paper")

    # Create the plot
    sns.relplot(
        data=data, 
        kind="line",
        x="Iteration", 
        y=label1, 
        hue="Dataset", 
        style="Style", 
        markers=True, 
        dashes=True,
        ci="sd",  # Show standard deviation
        height=5, 
        aspect=1.5,
        palette=palette
    )

    # Titles and labels
    plt.title(title)
    plt.xlabel("Iteration")
    plt.ylabel(label1)

    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f"Plot saved to {save_path}")
    plt.show()
    
def plot_data(log1, log2=None, 
                    save_path=None, 
                    label1="Log 1", 
                    label2=None, 
                    title="Fraction of Valid Peptides Over Iterations", 
                    palette=None):
    """
    Plots one or two datasets with their mean values over iterations.

    Parameters:
        log1 (list): The first list of mean values for each iteration.
        log2 (list, optional): The second list of mean values for each iteration. Defaults to None.
        save_path (str): Path to save the plot. Defaults to None.
        label1 (str): Label for the first dataset. Defaults to "Log 1".
        label2 (str, optional): Label for the second dataset. Defaults to None.
        title (str): Title of the plot. Defaults to "Mean Values Over Iterations".
        palette (dict, optional): A dictionary defining custom colors for datasets. Defaults to None.
    """
    # Prepare data for log1
    data1 = pd.DataFrame({
        "Iteration": range(1, len(log1) + 1),
        "Fraction of Valid Peptides": log1,
        "Dataset": label1
    })

    # Prepare data for log2 if provided
    if log2 is not None:
        data2 = pd.DataFrame({
            "Iteration": range(1, len(log2) + 1),
            "Fraction of Valid Peptides": log2,
            "Dataset": label2
        })
        data = pd.concat([data1, data2], ignore_index=True)
    else:
        data = data1

    palette = {
        label1: "#8181ED",  # Default color for log1
        label2: "#D577FF"   # Default color for log2 (if provided)
    }

    # Set Seaborn theme
    sns.set_theme()
    sns.set_context("paper")

    # Create the plot
    sns.lineplot(
        data=data, 
        x="Iteration", 
        y="Fraction of Valid Peptides", 
        hue="Dataset", 
        style="Dataset", 
        markers=True, 
        dashes=False, 
        palette=palette
    )

    # Titles and labels
    plt.title(title)
    plt.xlabel("Iteration")
    plt.ylabel("Fraction of Valid Peptides")

    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f"Plot saved to {save_path}")
    plt.show()