Skip to content

The Bias Visualizer

Module for visualizing results from the bias evaluation.

Source code in genda_lens/genda_lens.py
class Visualizer:
    """Module for visualizing results from the bias evaluation."""

    def __init__(self):
        import seaborn as sns

        sns.set_style("whitegrid", rc={"lines.linewidth": 1})
        sns.set_context("notebook", font_scale=1.2)

    def visualize_results(self, data, framework, model_name, task=None):
        """Visualize output from any of the genderbias tests that can be run in this package.

        Args:
            data (df): detailed output from any of the tests.
            framework (str): choose between "ner", "dawinobias" or "abc".
            model_name (str): model name
            task (str, optional): choose between "lm" or "coref" if the output is either from the dawinobias or abc framework.
        Returns:
            plot (plot): seaborn plot visualization.

        *EXAMPLE*

           ```python
           from genda_lens import Visualizer

           # initiate visualizer
           viz = Visualizer()

           # visualize ner results
           plot = viz.visualize_results(data = detailed_output_ner, framework = "ner", model_name "my-model-name")

           # visualize abc lm results
           plot = viz.visualize_results(data = detailed_output_lm, framework = "abc", model_name "my-model-name", task="lm")

           # visualize abc coref results
           plot = viz.visualize_results(data = detailed_output_lm, framework = "abc", model_name "my-model-name", task="coref")
           ```
        """
        import matplotlib.pyplot as plt
        import pandas as pd
        import seaborn as sns

        sns.set_style("whitegrid", rc={"lines.linewidth": 10})

        df = pd.DataFrame()

        if framework == "abc":
            df["Stereotypical Occupations"] = [
                "Female occupations",
                "Male occupations",
                "Main Effect",
            ] * 2
            df["Anti-reflexive Pronoun"] = ["Female"] * 3 + ["Male"] * 3
            markers = ["o", "o", "o"]
            x = "Anti-reflexive Pronoun"
            nuance = "Stereotypical Occupations"

            if task == "coref":
                try:
                    mean_fem = list(data.loc["Mean Rate of Detected Clusters"])[1]
                    mean_male = list(data.loc["Mean Rate of Detected Clusters"])[3]
                    fpr_fem_pron_fem_occ = list(data.loc["Rate of Detected Clusters"])[
                        0
                    ]
                    fpr_fem_pron_male_occ = list(data.loc["Rate of Detected Clusters"])[
                        1
                    ]
                    fpr_male_pron_fem_occ = list(data.loc["Rate of Detected Clusters"])[
                        2
                    ]
                    fpr_male_pron_male_occ = list(
                        data.loc["Rate of Detected Clusters"]
                    )[3]
                except:
                    mean_fem = float(data.iloc[2, 2])
                    mean_male = float(data.iloc[2, 4])
                    fpr_fem_pron_fem_occ = float(data.iloc[4, 1])
                    fpr_fem_pron_male_occ = float(data.iloc[4, 2])
                    fpr_male_pron_fem_occ = float(data.iloc[4, 3])
                    fpr_male_pron_male_occ = float(data.iloc[4, 4])

                points = [
                    fpr_fem_pron_fem_occ,
                    fpr_fem_pron_male_occ,
                    mean_fem,
                    fpr_male_pron_fem_occ,
                    fpr_male_pron_male_occ,
                    mean_male,
                ]

                df["False Positive Rates"] = points
                y = "False Positive Rates"
                title = f"ABC Coref Task: {model_name}"

            elif task == "lm":
                try:
                    points = [
                        float(data.iloc[4, 1].split(" ")[0]),
                        float(data.iloc[4, 2].split(" ")[0]),
                        float(data.iloc[2, 1].split(" ")[0]),
                        float(data.iloc[4, 3].split(" ")[0]),
                        float(data.iloc[4, 4].split(" ")[0]),
                        float(data.iloc[2, 3].split(" ")[0]),
                    ]
                except:
                    points = [
                        float(data.iloc[4, 0].split(" ")[0]),
                        float(data.iloc[4, 1].split(" ")[0]),
                        float(data.iloc[2, 0].split(" ")[0]),
                        float(data.iloc[4, 2].split(" ")[0]),
                        float(data.iloc[4, 3].split(" ")[0]),
                        float(data.iloc[2, 2].split(" ")[0]),
                    ]
                df["Median Perplexity"] = points
                y = "Median Perplexity"
                title = f"ABC LM Task: {model_name}"

        elif framework == "dawinobias":
            df["Pronoun"] = ["Female (F1)", "Male (F1)", "Main Effect (Accuracy)"] * 2
            if task == "coref":
                try:
                    points = [
                        float(data.iloc[4, 1]),
                        float(data.iloc[4, 2]),
                        float(data.iloc[2, 1]),
                        float(data.iloc[4, 3]),
                        float(data.iloc[4, 4]),
                        float(data.iloc[2, 3]),
                    ]
                except:
                    points = [
                        data.loc["F1"][0],
                        data.loc["F1"][1],
                        data.loc["Accuracy"][0],
                        data.loc["F1"][2],
                        data.loc["F1"][3],
                        data.loc["Accuracy"][2],
                    ]
                title = f"DaWinoBias, Coreference Task: {model_name}"
            elif task == "lm":
                try:  # if loaded
                    points = [
                        float(data.iloc[4, 1]),
                        float(data.iloc[4, 2]),
                        float(data.iloc[2, 1]),
                        float(data.iloc[4, 3]),
                        float(data.iloc[4, 4]),
                        float(data.iloc[2, 3]),
                    ]
                except:  # if
                    points = [
                        data.loc["F1"][0],
                        data.loc["F1"][1],
                        data.loc["Accuracy"][0],
                        data.loc["F1"][2],
                        data.loc["F1"][3],
                        data.loc["Accuracy"][2],
                    ]
                title = f"DaWinoBias, LM Task: {model_name}"

            df["Performance"] = points
            df["Condition"] = ["Anti-stereotypical"] * 3 + ["Pro-stereotypical"] * 3
            x = "Condition"
            y = "Performance"
            nuance = "Pronoun"
            markers = ["o", "o", "o"]
            title = title

        elif framework == "ner":
            df["Protected Group"] = ["Majority (F1)", "Minority (F1)"] * 2
            try:
                points = [
                    float(data.iloc[2, 1].split(" ")[0]),
                    float(data.iloc[4, 2].split(" ")[0]),
                    float(data.iloc[2, 3].split(" ")[0]),
                    float(data.iloc[4, 4].split(" ")[0]),
                ]
            except:
                points = [
                    float(data.iloc[4, 0].split(" ")[0]),
                    float(data.iloc[4, 1].split(" ")[0]),
                    float(data.iloc[4, 2].split(" ")[0]),
                    float(data.iloc[4, 3].split(" ")[0]),
                ]

            df["Performance"] = points
            df["Augmentation"] = ["Female Names"] * 2 + ["Male Names"] * 2
            x = "Augmentation"
            y = "Performance"
            nuance = "Protected Group"
            markers = ["o", "o"]
            title = f"NER Task, Augmented DaNe: {model_name}"

        sns.pointplot(
            data=df,
            x=x,
            y=y,
            hue=nuance,
            dodge=True,
            join=True,
            markers=markers,
            scale=1.2,
            linestyles=[":", ":", "-"],
            palette=["sandybrown", "mediumpurple", "darkgrey"],
        ).set_title(title)

        plt.minorticks_on()

        return plt

visualize_results(data, framework, model_name, task=None)

Visualize output from any of the genderbias tests that can be run in this package.

Parameters:

Name Type Description Default
data df

detailed output from any of the tests.

required
framework str

choose between "ner", "dawinobias" or "abc".

required
model_name str

model name

required
task str

choose between "lm" or "coref" if the output is either from the dawinobias or abc framework.

None

Returns:

Name Type Description
plot plot

seaborn plot visualization.

EXAMPLE

from genda_lens import Visualizer

# initiate visualizer
viz = Visualizer()

# visualize ner results
plot = viz.visualize_results(data = detailed_output_ner, framework = "ner", model_name "my-model-name")

# visualize abc lm results
plot = viz.visualize_results(data = detailed_output_lm, framework = "abc", model_name "my-model-name", task="lm")

# visualize abc coref results
plot = viz.visualize_results(data = detailed_output_lm, framework = "abc", model_name "my-model-name", task="coref")
Source code in genda_lens/genda_lens.py
def visualize_results(self, data, framework, model_name, task=None):
    """Visualize output from any of the genderbias tests that can be run in this package.

    Args:
        data (df): detailed output from any of the tests.
        framework (str): choose between "ner", "dawinobias" or "abc".
        model_name (str): model name
        task (str, optional): choose between "lm" or "coref" if the output is either from the dawinobias or abc framework.
    Returns:
        plot (plot): seaborn plot visualization.

    *EXAMPLE*

       ```python
       from genda_lens import Visualizer

       # initiate visualizer
       viz = Visualizer()

       # visualize ner results
       plot = viz.visualize_results(data = detailed_output_ner, framework = "ner", model_name "my-model-name")

       # visualize abc lm results
       plot = viz.visualize_results(data = detailed_output_lm, framework = "abc", model_name "my-model-name", task="lm")

       # visualize abc coref results
       plot = viz.visualize_results(data = detailed_output_lm, framework = "abc", model_name "my-model-name", task="coref")
       ```
    """
    import matplotlib.pyplot as plt
    import pandas as pd
    import seaborn as sns

    sns.set_style("whitegrid", rc={"lines.linewidth": 10})

    df = pd.DataFrame()

    if framework == "abc":
        df["Stereotypical Occupations"] = [
            "Female occupations",
            "Male occupations",
            "Main Effect",
        ] * 2
        df["Anti-reflexive Pronoun"] = ["Female"] * 3 + ["Male"] * 3
        markers = ["o", "o", "o"]
        x = "Anti-reflexive Pronoun"
        nuance = "Stereotypical Occupations"

        if task == "coref":
            try:
                mean_fem = list(data.loc["Mean Rate of Detected Clusters"])[1]
                mean_male = list(data.loc["Mean Rate of Detected Clusters"])[3]
                fpr_fem_pron_fem_occ = list(data.loc["Rate of Detected Clusters"])[
                    0
                ]
                fpr_fem_pron_male_occ = list(data.loc["Rate of Detected Clusters"])[
                    1
                ]
                fpr_male_pron_fem_occ = list(data.loc["Rate of Detected Clusters"])[
                    2
                ]
                fpr_male_pron_male_occ = list(
                    data.loc["Rate of Detected Clusters"]
                )[3]
            except:
                mean_fem = float(data.iloc[2, 2])
                mean_male = float(data.iloc[2, 4])
                fpr_fem_pron_fem_occ = float(data.iloc[4, 1])
                fpr_fem_pron_male_occ = float(data.iloc[4, 2])
                fpr_male_pron_fem_occ = float(data.iloc[4, 3])
                fpr_male_pron_male_occ = float(data.iloc[4, 4])

            points = [
                fpr_fem_pron_fem_occ,
                fpr_fem_pron_male_occ,
                mean_fem,
                fpr_male_pron_fem_occ,
                fpr_male_pron_male_occ,
                mean_male,
            ]

            df["False Positive Rates"] = points
            y = "False Positive Rates"
            title = f"ABC Coref Task: {model_name}"

        elif task == "lm":
            try:
                points = [
                    float(data.iloc[4, 1].split(" ")[0]),
                    float(data.iloc[4, 2].split(" ")[0]),
                    float(data.iloc[2, 1].split(" ")[0]),
                    float(data.iloc[4, 3].split(" ")[0]),
                    float(data.iloc[4, 4].split(" ")[0]),
                    float(data.iloc[2, 3].split(" ")[0]),
                ]
            except:
                points = [
                    float(data.iloc[4, 0].split(" ")[0]),
                    float(data.iloc[4, 1].split(" ")[0]),
                    float(data.iloc[2, 0].split(" ")[0]),
                    float(data.iloc[4, 2].split(" ")[0]),
                    float(data.iloc[4, 3].split(" ")[0]),
                    float(data.iloc[2, 2].split(" ")[0]),
                ]
            df["Median Perplexity"] = points
            y = "Median Perplexity"
            title = f"ABC LM Task: {model_name}"

    elif framework == "dawinobias":
        df["Pronoun"] = ["Female (F1)", "Male (F1)", "Main Effect (Accuracy)"] * 2
        if task == "coref":
            try:
                points = [
                    float(data.iloc[4, 1]),
                    float(data.iloc[4, 2]),
                    float(data.iloc[2, 1]),
                    float(data.iloc[4, 3]),
                    float(data.iloc[4, 4]),
                    float(data.iloc[2, 3]),
                ]
            except:
                points = [
                    data.loc["F1"][0],
                    data.loc["F1"][1],
                    data.loc["Accuracy"][0],
                    data.loc["F1"][2],
                    data.loc["F1"][3],
                    data.loc["Accuracy"][2],
                ]
            title = f"DaWinoBias, Coreference Task: {model_name}"
        elif task == "lm":
            try:  # if loaded
                points = [
                    float(data.iloc[4, 1]),
                    float(data.iloc[4, 2]),
                    float(data.iloc[2, 1]),
                    float(data.iloc[4, 3]),
                    float(data.iloc[4, 4]),
                    float(data.iloc[2, 3]),
                ]
            except:  # if
                points = [
                    data.loc["F1"][0],
                    data.loc["F1"][1],
                    data.loc["Accuracy"][0],
                    data.loc["F1"][2],
                    data.loc["F1"][3],
                    data.loc["Accuracy"][2],
                ]
            title = f"DaWinoBias, LM Task: {model_name}"

        df["Performance"] = points
        df["Condition"] = ["Anti-stereotypical"] * 3 + ["Pro-stereotypical"] * 3
        x = "Condition"
        y = "Performance"
        nuance = "Pronoun"
        markers = ["o", "o", "o"]
        title = title

    elif framework == "ner":
        df["Protected Group"] = ["Majority (F1)", "Minority (F1)"] * 2
        try:
            points = [
                float(data.iloc[2, 1].split(" ")[0]),
                float(data.iloc[4, 2].split(" ")[0]),
                float(data.iloc[2, 3].split(" ")[0]),
                float(data.iloc[4, 4].split(" ")[0]),
            ]
        except:
            points = [
                float(data.iloc[4, 0].split(" ")[0]),
                float(data.iloc[4, 1].split(" ")[0]),
                float(data.iloc[4, 2].split(" ")[0]),
                float(data.iloc[4, 3].split(" ")[0]),
            ]

        df["Performance"] = points
        df["Augmentation"] = ["Female Names"] * 2 + ["Male Names"] * 2
        x = "Augmentation"
        y = "Performance"
        nuance = "Protected Group"
        markers = ["o", "o"]
        title = f"NER Task, Augmented DaNe: {model_name}"

    sns.pointplot(
        data=df,
        x=x,
        y=y,
        hue=nuance,
        dodge=True,
        join=True,
        markers=markers,
        scale=1.2,
        linestyles=[":", ":", "-"],
        palette=["sandybrown", "mediumpurple", "darkgrey"],
    ).set_title(title)

    plt.minorticks_on()

    return plt