Module epiclass.core.confusion_matrix

ConfusionMatrixWriter class

Classes

class ConfusionMatrixWriter (labels: List[str], confusion_matrix: np.ndarray)

Class to create/handle confusion matrices.

labels : list of classes string representation confusion_matrix : A confusion matrix that counts each final prediction (int matrix) Expects a confusion matrix input with prediction rows (row value: pred1 pred2 pred3 …) and target columns.

Expand source code
class ConfusionMatrixWriter:
    """Class to create/handle confusion matrices.

    labels : list of classes string representation
    confusion_matrix : A confusion matrix that counts each final prediction (int matrix)
    Expects a confusion matrix input with prediction rows (row value: pred1 pred2 pred3 ...) and target columns.
    """

    def __init__(self, labels: List[str], confusion_matrix: np.ndarray):
        self._labels = sorted(labels)
        self._og_confusion_mat = np.array(confusion_matrix)
        self._pd_matrix, self._pd_rel_matrix = self.init_confusion_matrices(
            confusion_matrix
        )  # pd dataframe

    def __add__(self, other: ConfusionMatrixWriter) -> ConfusionMatrixWriter:
        if set(self._labels) != set(other._labels):
            raise ValueError("Cannot add matrices with different labels.")

        new_mat = self._og_confusion_mat + other._og_confusion_mat  # type: ignore
        new_mat = ConfusionMatrixWriter(self._labels, new_mat)
        return new_mat

    @staticmethod
    def _extract_class(label: str):
        """Extract class for a label with count, e.g. input(42)."""
        return str(label).split("(", 1)[0]

    @classmethod
    def from_csv(cls, csv_path, relative: bool):
        """Create instance from already written file.
        The state of the matrix (relative or not) needs to be specified.
        """
        obj = cls.__new__(cls)  # Does not call __init__
        content = pd.DataFrame(pd.read_csv(csv_path, sep=",", index_col=0))
        labels_w_count = content.index.tolist()
        values = content.values
        if relative:
            if np.any(a=values >= 2, axis=None):
                raise InputMatrixError(
                    "Inputed file seems to contain count values, but relative=True was given."
                )

            obj._pd_rel_matrix = content
            obj._labels = sorted(
                [ConfusionMatrixWriter._extract_class(val) for val in labels_w_count]
            )

            labels_count = re.findall(r"\(([0-9]+)\)", "".join(labels_w_count))
            labels_count = np.array(labels_count, dtype=int)

            mat = np.array((content.values.T * labels_count).T, dtype=float)
            obj._og_confusion_mat = np.around(mat).astype(int)

            obj._pd_matrix = pd.DataFrame(
                data=obj._og_confusion_mat, index=content.index, columns=content.columns
            )  # pylint: disable=no-member
        else:
            if np.any(a=(values <= 0.99) & (values >= 0.01), axis=None):
                raise InputMatrixError(
                    "Inputed file seems to contain relative values, but relative=False was given."
                )

            obj._pd_matrix = content
            obj._labels = sorted(
                [ConfusionMatrixWriter._extract_class(val) for val in labels_w_count]
            )  # pylint: disable=no-member

            obj._og_confusion_mat = content.to_numpy()  # pylint: disable=no-member

            rel_mat = ConfusionMatrixWriter.to_relative_confusion_matrix(
                labels_count=obj._og_confusion_mat.sum(axis=1),
                confusion_matrix=obj._og_confusion_mat,
            )
            obj._pd_rel_matrix = pd.DataFrame(
                data=rel_mat, index=content.index, columns=content.columns
            )  # pylint: disable=no-member
        return obj

    def init_confusion_matrices(self, confusion_matrix: np.ndarray):
        """Returns confusion matrices with labels (pandas df) from int matrix.
        Expects prediction rows (target pred1 pred2 pred3 ....) and target columns.
        Returns original and normalized on rows matrices.
        """
        labels_count = confusion_matrix.sum(axis=1)  # total nb examples of each label
        labels_w_count = [
            f"{label}({label_count})"
            for label, label_count in zip(self._labels, labels_count)
        ]

        count_matrix = pd.DataFrame(
            data=confusion_matrix, index=labels_w_count, columns=self._labels, dtype=int
        )

        rel_confusion_mat = ConfusionMatrixWriter.to_relative_confusion_matrix(
            labels_count, confusion_matrix
        )
        rel_matrix = pd.DataFrame(
            data=rel_confusion_mat, index=labels_w_count, columns=self._labels
        )

        return count_matrix, rel_matrix

    @staticmethod
    def to_relative_confusion_matrix(
        labels_count: np.ndarray, confusion_matrix: np.ndarray
    ):
        """Normalize confusion matrix per row.
        Expects prediction rows (target pred1 pred2 pred3 ....) and target columns.
        """
        confusion_mat1 = np.divide(confusion_matrix.T, labels_count)
        confusion_mat2 = np.nan_to_num(confusion_mat1)
        return confusion_mat2.T

    def to_png(self, path: str | Path) -> None:
        """Write to path an image of the confusion matrix.
        Colors : https://i.stack.imgur.com/cmk1J.png
        Ref code : https://matplotlib.org/3.1.0/tutorials/colors/colormap-manipulation.html
        https://stackoverflow.com/questions/35710931/remove-a-section-of-a-colormap
        """
        fig, ax = plt.subplots()  # Initialize a new figure and axes

        vmax = 0.9999  # this is so exactly 1.0 is a different color from the rest
        vmin = 0.0

        # mask empty values, so they are white in the image
        data_mask = np.ma.masked_where(self._pd_rel_matrix == 0, self._pd_rel_matrix)

        # prep colormap
        nb_colors = 20
        gnuplot = cm.get_cmap("gnuplot", nb_colors)  # 20 colors
        newcolors = gnuplot(np.linspace(0.0, 1.0, nb_colors))
        new_cmap = ListedColormap(newcolors)  # type: ignore
        new_cmap.set_over(
            matplotlib.colors.to_rgba("GreenYellow")  # type: ignore
        )  # color for max values, do it LAST

        # create color mesh and arrange ticks
        mesh = ax.pcolormesh(
            data_mask, cmap=new_cmap, vmin=vmin, vmax=vmax, edgecolors="k"
        )

        nb_labels = self._pd_rel_matrix.columns.shape[0]
        ax.set_frame_on(False)
        ax.set_xticks(np.arange(nb_labels), minor=False)
        ax.set_yticks(np.arange(nb_labels), minor=False)
        ax.invert_yaxis()
        ax.xaxis.tick_top()

        ax.set_xticklabels(self._pd_rel_matrix.columns, rotation=70, ha="left")
        ax.set_yticklabels(self._pd_rel_matrix.index, va="top")

        ax = plt.gca()
        for t in ax.xaxis.get_major_ticks():
            t.tick1On = False
            t.tick2On = False
        for t in ax.yaxis.get_major_ticks():
            t.tick1On = False
            t.tick2On = False

        # annotate each matrix value position
        for i in np.arange(nb_labels):
            for j in np.arange(nb_labels):
                count = self._pd_matrix.iat[i, j]
                if count != 0:
                    text = f"{count}\n{self._pd_rel_matrix.iat[i, j]*100:.1f}%"
                    text_obj = ax.text(
                        x=j + 0.5,
                        y=i + 0.5,
                        s=text,
                        horizontalalignment="center",
                        verticalalignment="center",
                        color="w",
                        size=5,
                    )

                    text_obj.set_path_effects(
                        [
                            path_effects.Stroke(linewidth=1, foreground="black"),
                            path_effects.Normal(),
                        ]  # type: ignore
                    )

        # Color bar
        bounds = np.linspace(
            vmin, math.ceil(vmax), nb_colors + 1
        )  # just to have the max tick appear properly
        ticks = np.linspace(vmin, math.ceil(vmax), 11)
        cbar = fig.colorbar(
            mesh, ax=ax, shrink=0.75, boundaries=bounds, ticks=ticks, extend="max"
        )
        cbar.ax.tick_params(labelsize=7)

        plt.tight_layout()
        fig.savefig(path, format="png", dpi=500)  # Save the figure to a file
        plt.close(fig)  # Close the figure window

    def to_csv(self, path, relative):
        """Write to path a csv file of the confusion matrix.

        The type of matrix (relative by row, or not) needs to be specified.
        """
        if relative:
            self._pd_rel_matrix.to_csv(path, encoding="utf8", float_format="%.4f")
        else:
            self._pd_matrix.to_csv(path, encoding="utf8")

    def to_all_formats(self, logdir: str | Path, name: str) -> Tuple[Path, Path, Path]:
        """Write to logdir files of the confusion matrix.
        out 1 : Path of csv of non-normalized matrix
        out 2 : Path of csv of normalized matrix
        out 3 : Path of png of matrix
        """
        outpath = Path(logdir) / name

        out1 = outpath.with_name(f"{name}.csv")
        out1_rel = outpath.with_name(f"{name}_relative.csv")
        out2 = outpath.with_name(f"{name}.png")

        self.to_csv(out1, relative=False)
        self.to_csv(out1_rel, relative=True)
        self.to_png(str(out2))

        return out1, out1_rel, out2

    @staticmethod
    def convert_matrix_csv_to_png(in_path, out_path, relative):
        """Convert csv of confusion matrix to a png, and write it to out_path.

        The state of the read matrix (relative by row or not) needs to be specified.
        """
        writer = ConfusionMatrixWriter.from_csv(in_path, relative)
        writer.to_png(out_path)

Static methods

def convert_matrix_csv_to_png(in_path, out_path, relative)

Convert csv of confusion matrix to a png, and write it to out_path.

The state of the read matrix (relative by row or not) needs to be specified.

def from_csv(csv_path, relative: bool)

Create instance from already written file. The state of the matrix (relative or not) needs to be specified.

def to_relative_confusion_matrix(labels_count: np.ndarray, confusion_matrix: np.ndarray)

Normalize confusion matrix per row. Expects prediction rows (target pred1 pred2 pred3 ....) and target columns.

Methods

def init_confusion_matrices(self, confusion_matrix: np.ndarray)

Returns confusion matrices with labels (pandas df) from int matrix. Expects prediction rows (target pred1 pred2 pred3 ....) and target columns. Returns original and normalized on rows matrices.

def to_all_formats(self, logdir: str | Path, name: str)

Write to logdir files of the confusion matrix. out 1 : Path of csv of non-normalized matrix out 2 : Path of csv of normalized matrix out 3 : Path of png of matrix

def to_csv(self, path, relative)

Write to path a csv file of the confusion matrix.

The type of matrix (relative by row, or not) needs to be specified.

def to_png(self, path: str | Path)
class InputMatrixError (*args, **kwargs)

Raised when input matrix is not as expected.

Expand source code
class InputMatrixError(Exception):
    """Raised when input matrix is not as expected."""

Ancestors

  • builtins.Exception
  • builtins.BaseException