Source code for plom.create.buildClasslist

# SPDX-License-Identifier: AGPL-3.0-or-later
# Copyright (C) 2018-2022 Andrew Rechnitzer
# Copyright (C) 2019-2025 Colin B. Macdonald
# Copyright (C) 2020 Vala Vakilian
# Copyright (C) 2020 Dryden Wiebe

import csv
import tempfile
from importlib import resources
from pathlib import Path

# try to avoid importing Pandas unless we use specific functions: Issue #2154
# import pandas
import plom
from plom.create.classlistValidator import (
    PlomClasslistValidator,
    fullname_field,
    papernumber_field,
    sid_field,
)
from plom.finish.return_tools import import_canvas_csv

# Note: file is full of pandas warnings, which I think are false positives
# pylint: disable=unsubscriptable-object
# pylint: disable=unsupported-assignment-operation


def clean_non_canvas_csv(csv_file_name, minimalist=True):
    """Read the csv file and clean the csv file.

    1. Retrieve the id.
    2. Retrieve the name

    You may want to check first with `check_is_non_canvas_csv`.

    Args:
        csv_file_name (pathlib.Path/str): the csv file.

    Keyword Args:
        minimalist: discard unnecessary columns.

    Returns:
        pandas.DataFrame: data with columns `id` and `name`
        and possibly `papernum` if you had such a column in the input.
        With ``minimalist=True`` kwarg specified, this is all you get,
        otherwise the original columns will be included too, except
        those renamed to create the required columns.
    """
    import pandas

    df = pandas.read_csv(csv_file_name, dtype="object")
    print('Extracting columns from csv file: "{0}"'.format(csv_file_name))

    # strip excess whitespace from column names
    df.rename(columns=lambda x: str(x).strip(), inplace=True)

    # find the id column and clean it up.
    id_column = None
    for c in df.columns:
        if c.casefold() == sid_field:
            id_column = c
            break
    if id_column is None:
        raise ValueError('no "id" column is present')
    if id_column != "id":
        print(f'Renaming column "{id_column}" to "id"')
    df.rename(columns={id_column: "id"}, inplace=True)
    # clean up the column - strip whitespace
    df["id"] = df["id"].apply(lambda X: str(X).strip())  # avoid issues with non-string

    # find the name column and clean it up.
    fullname_column = None
    for c in df.columns:
        if c.casefold() == fullname_field.casefold():
            fullname_column = c
            break
    if fullname_column is None:
        raise ValueError('no "name" column is present')
    if fullname_column != "name":
        print(f'Renaming column "{fullname_column}" to "name"')
    df.rename(columns={fullname_column: "name"}, inplace=True)
    # clean up the column - strip whitespace
    df["name"].apply(lambda X: str(X).strip())  # avoid errors with blanks

    find_paper_number_column(df)

    # everything clean - now either return just the necessary columns or all cols.
    if minimalist:
        return df[["id", "name", "paper_number"]]
    return df


def find_paper_number_column(df, *, make=True):
    """Find or make a paper_number column.

    Args:
        df: a Pandas dataframe.

    Keyword Args:
        make (bool): make an placeholder `paper_number` column if one
            is not found.  True by default.

    Returns:
        None: modifies the input `df`.
    """
    import pandas

    # find the paper-number column and clean it up.
    papernumber_column = None
    for c in df.columns:
        if c.casefold() == papernumber_field.casefold():
            papernumber_column = c
            break
    if not papernumber_column:
        if not make:
            raise ValueError('no "paper_number" column is present.')
        papernumber_column = "paper_number"
        df[[papernumber_column]] = None
    # clean it up.
    df[papernumber_column] = df[papernumber_column].apply(
        lambda x: -1 if pandas.isna(x) else int(x)
    )
    if papernumber_column != "paper_number":
        print(f'Renaming column "{papernumber_column}" to "paper_number"')
    df.rename(columns={papernumber_column: "paper_number"}, inplace=True)


def clean_canvas_csv(csv_file_name):
    """Read the canvas csv file and clean the csv file.

    You may want to first check if the file is a Canvas-exported file
    using `check_is_canvas_csv`.

    Arguments:
        csv_file_name (pathlib.Path/str): the csv file.

    Returns:
        pandas.DataFrame: data with columns `id` and `name`
    """
    STUDENT_NUM_COL = "Student Number"
    # STUDENT_NUM_COL = "SIS User ID"
    df = import_canvas_csv(csv_file_name)
    find_paper_number_column(df)
    df = df[[STUDENT_NUM_COL, "Student", "paper_number"]]
    df.columns = ["id", "name", "paper_number"]
    return df


def process_classlist_backend(student_csv_file_name):
    """Process classlist, either from a canvas style csv or user-formatted.

    1. Check if the file is a csv exported from Canvas.  If so extract
       relevant headers and clean-up the file.
    2. Otherwise check for suitable ID and name columns.
    3. Check for latin character encodability, a restriction to be
       loosened in the future.

    Arguments:
        student_csv_file_name (pathlib.Path/str): class info csv file.

    Returns:
        pandas.DataFrame: the processed classlist data.
    """
    with open(student_csv_file_name) as csvfile:
        csv_reader = csv.DictReader(csvfile, skipinitialspace=True)
        csv_fields = csv_reader.fieldnames
    print("Class list headers = {}".format(csv_fields))

    # Depending on the type of file, whether its a Canvas file or not,
    # we need to check it has the minimum information ie student name/id.
    # If not we will fail the process.

    # First we check if this csv file is a Canvas output - using the validator

    vlad = PlomClasslistValidator()

    if vlad.check_is_canvas_csv(student_csv_file_name):
        print("This file looks like it was exported from Canvas")
        student_info_df = clean_canvas_csv(student_csv_file_name)
        print("We have successfully extracted columns from Canvas data and renaming")
    elif vlad.check_is_non_canvas_csv(student_csv_file_name):
        print(
            "This file looks like it was not exported from Canvas; checking for the required information..."
        )
        student_info_df = clean_non_canvas_csv(student_csv_file_name)
        print(
            "We have successfully extracted and renamed columns from the non Canvas data."
        )
    else:
        raise ValueError("Problems with the supplied classlist. See output above.")

    return student_info_df


[docs] def get_demo_classlist(spec): """Get the demo classlist.""" # Direct approach: but maybe I like exercising code-paths with below... # with (resources.files(plom) / "demoClassList.csv").open("r") as f: # df = clean_non_canvas_csv(f) # classlist = df.to_dict("records") b = (resources.files(plom) / "demoClassList.csv").read_bytes() # Context manager not appropriate here, Issue #1996 f = Path(tempfile.NamedTemporaryFile(delete=False, suffix=".csv").name) with open(f, "wb") as fh: fh.write(b) success, clist = process_classlist_file(f, spec, ignore_warnings=True) if success is False: raise ValueError( f"Something has gone seriously wrong with the demo classlist - {clist}." ) f.unlink() # The raw demo classlist does not have any pre-named students. # So here we pre-name half of spec[numberToProduce] papers for n in range(spec["numberToProduce"] // 2): clist[n]["paper_number"] = n + 1 # now only return the classlist truncated to numberToProduce lines return clist[: (spec["numberToProduce"] + 1)]
[docs] def process_classlist_file(student_csv_file_name, spec, *, ignore_warnings=False): """Get student names/IDs from a csv file. Student numbers come from an `id` column. Student names must be in a *single* 'name' column. There is some flexibility in those titles, see - :func:`plom.create.possible_sid_fields` - :func:`plom.create.possible_fullname_fields` Alternatively, give a .csv exported from Canvas (experimental!) Arguments: student_csv_file_name (pathlib.Path/str): class info csv file. spec (dict): validated test spec. Keyword Arguments: ignore_warnings (bool): if true, proceed with classlist processing even if there are warnings. Default False. Returns: tuple: if successful then "(True, clist)" where clist is a list of dicts each with "id" and "name". On failure "(False, warn_err)" where "warn_err" is a list of dicts of warnings and errors. Each dict contains "warn_or_err" which is 'warning' or 'error', "werr_line" being the line number at which the error occurs, and 'werr_text' being a string describing the warning/error. """ student_csv_file_name = Path(student_csv_file_name) if not student_csv_file_name.exists(): raise FileNotFoundError(f'Cannot find file "{student_csv_file_name}"') vlad = PlomClasslistValidator() if not vlad.check_is_canvas_csv(student_csv_file_name): success, warn_err = vlad.validate_csv(student_csv_file_name, spec=spec) if success is False: # validation failed, return warning, error list PlomClasslistValidator.print_classlist_warnings_errors(warn_err) return (False, warn_err) # validation passed but there are warnings if warn_err: PlomClasslistValidator.print_classlist_warnings_errors(warn_err) if not ignore_warnings: return (False, warn_err) print("Continuing despite warnings") df = process_classlist_backend(student_csv_file_name) # "records" makes it output a list-of-dicts, one per row return (True, df.to_dict("records"))