# SPDX-License-Identifier: AGPL-3.0-or-later
# Copyright (C) 2018-2022 Andrew Rechnitzer
# Copyright (C) 2019-2025 Colin B. Macdonald
# Copyright (C) 2020 Vala Vakilian
# Copyright (C) 2020 Dryden Wiebe
import csv
import tempfile
from importlib import resources
from pathlib import Path
# try to avoid importing Pandas unless we use specific functions: Issue #2154
# import pandas
import plom
from plom.create.classlistValidator import (
PlomClasslistValidator,
fullname_field,
papernumber_field,
sid_field,
)
from plom.finish.return_tools import import_canvas_csv
# Note: file is full of pandas warnings, which I think are false positives
# pylint: disable=unsubscriptable-object
# pylint: disable=unsupported-assignment-operation
def clean_non_canvas_csv(csv_file_name, minimalist=True):
"""Read the csv file and clean the csv file.
1. Retrieve the id.
2. Retrieve the name
You may want to check first with `check_is_non_canvas_csv`.
Args:
csv_file_name (pathlib.Path/str): the csv file.
Keyword Args:
minimalist: discard unnecessary columns.
Returns:
pandas.DataFrame: data with columns `id` and `name`
and possibly `papernum` if you had such a column in the input.
With ``minimalist=True`` kwarg specified, this is all you get,
otherwise the original columns will be included too, except
those renamed to create the required columns.
"""
import pandas
df = pandas.read_csv(csv_file_name, dtype="object")
print('Extracting columns from csv file: "{0}"'.format(csv_file_name))
# strip excess whitespace from column names
df.rename(columns=lambda x: str(x).strip(), inplace=True)
# find the id column and clean it up.
id_column = None
for c in df.columns:
if c.casefold() == sid_field:
id_column = c
break
if id_column is None:
raise ValueError('no "id" column is present')
if id_column != "id":
print(f'Renaming column "{id_column}" to "id"')
df.rename(columns={id_column: "id"}, inplace=True)
# clean up the column - strip whitespace
df["id"] = df["id"].apply(lambda X: str(X).strip()) # avoid issues with non-string
# find the name column and clean it up.
fullname_column = None
for c in df.columns:
if c.casefold() == fullname_field.casefold():
fullname_column = c
break
if fullname_column is None:
raise ValueError('no "name" column is present')
if fullname_column != "name":
print(f'Renaming column "{fullname_column}" to "name"')
df.rename(columns={fullname_column: "name"}, inplace=True)
# clean up the column - strip whitespace
df["name"].apply(lambda X: str(X).strip()) # avoid errors with blanks
find_paper_number_column(df)
# everything clean - now either return just the necessary columns or all cols.
if minimalist:
return df[["id", "name", "paper_number"]]
return df
def find_paper_number_column(df, *, make=True):
"""Find or make a paper_number column.
Args:
df: a Pandas dataframe.
Keyword Args:
make (bool): make an placeholder `paper_number` column if one
is not found. True by default.
Returns:
None: modifies the input `df`.
"""
import pandas
# find the paper-number column and clean it up.
papernumber_column = None
for c in df.columns:
if c.casefold() == papernumber_field.casefold():
papernumber_column = c
break
if not papernumber_column:
if not make:
raise ValueError('no "paper_number" column is present.')
papernumber_column = "paper_number"
df[[papernumber_column]] = None
# clean it up.
df[papernumber_column] = df[papernumber_column].apply(
lambda x: -1 if pandas.isna(x) else int(x)
)
if papernumber_column != "paper_number":
print(f'Renaming column "{papernumber_column}" to "paper_number"')
df.rename(columns={papernumber_column: "paper_number"}, inplace=True)
def clean_canvas_csv(csv_file_name):
"""Read the canvas csv file and clean the csv file.
You may want to first check if the file is a Canvas-exported file
using `check_is_canvas_csv`.
Arguments:
csv_file_name (pathlib.Path/str): the csv file.
Returns:
pandas.DataFrame: data with columns `id` and `name`
"""
STUDENT_NUM_COL = "Student Number"
# STUDENT_NUM_COL = "SIS User ID"
df = import_canvas_csv(csv_file_name)
find_paper_number_column(df)
df = df[[STUDENT_NUM_COL, "Student", "paper_number"]]
df.columns = ["id", "name", "paper_number"]
return df
def process_classlist_backend(student_csv_file_name):
"""Process classlist, either from a canvas style csv or user-formatted.
1. Check if the file is a csv exported from Canvas. If so extract
relevant headers and clean-up the file.
2. Otherwise check for suitable ID and name columns.
3. Check for latin character encodability, a restriction to be
loosened in the future.
Arguments:
student_csv_file_name (pathlib.Path/str): class info csv file.
Returns:
pandas.DataFrame: the processed classlist data.
"""
with open(student_csv_file_name) as csvfile:
csv_reader = csv.DictReader(csvfile, skipinitialspace=True)
csv_fields = csv_reader.fieldnames
print("Class list headers = {}".format(csv_fields))
# Depending on the type of file, whether its a Canvas file or not,
# we need to check it has the minimum information ie student name/id.
# If not we will fail the process.
# First we check if this csv file is a Canvas output - using the validator
vlad = PlomClasslistValidator()
if vlad.check_is_canvas_csv(student_csv_file_name):
print("This file looks like it was exported from Canvas")
student_info_df = clean_canvas_csv(student_csv_file_name)
print("We have successfully extracted columns from Canvas data and renaming")
elif vlad.check_is_non_canvas_csv(student_csv_file_name):
print(
"This file looks like it was not exported from Canvas; checking for the required information..."
)
student_info_df = clean_non_canvas_csv(student_csv_file_name)
print(
"We have successfully extracted and renamed columns from the non Canvas data."
)
else:
raise ValueError("Problems with the supplied classlist. See output above.")
return student_info_df
[docs]
def get_demo_classlist(spec):
"""Get the demo classlist."""
# Direct approach: but maybe I like exercising code-paths with below...
# with (resources.files(plom) / "demoClassList.csv").open("r") as f:
# df = clean_non_canvas_csv(f)
# classlist = df.to_dict("records")
b = (resources.files(plom) / "demoClassList.csv").read_bytes()
# Context manager not appropriate here, Issue #1996
f = Path(tempfile.NamedTemporaryFile(delete=False, suffix=".csv").name)
with open(f, "wb") as fh:
fh.write(b)
success, clist = process_classlist_file(f, spec, ignore_warnings=True)
if success is False:
raise ValueError(
f"Something has gone seriously wrong with the demo classlist - {clist}."
)
f.unlink()
# The raw demo classlist does not have any pre-named students.
# So here we pre-name half of spec[numberToProduce] papers
for n in range(spec["numberToProduce"] // 2):
clist[n]["paper_number"] = n + 1
# now only return the classlist truncated to numberToProduce lines
return clist[: (spec["numberToProduce"] + 1)]
[docs]
def process_classlist_file(student_csv_file_name, spec, *, ignore_warnings=False):
"""Get student names/IDs from a csv file.
Student numbers come from an `id` column. Student names
must be in a *single* 'name' column. There is some flexibility
in those titles, see
- :func:`plom.create.possible_sid_fields`
- :func:`plom.create.possible_fullname_fields`
Alternatively, give a .csv exported from Canvas (experimental!)
Arguments:
student_csv_file_name (pathlib.Path/str): class info csv file.
spec (dict): validated test spec.
Keyword Arguments:
ignore_warnings (bool): if true, proceed with classlist
processing even if there are warnings. Default False.
Returns:
tuple: if successful then "(True, clist)" where clist is a
list of dicts each with "id" and "name". On failure
"(False, warn_err)" where "warn_err" is a list of dicts of
warnings and errors. Each dict contains "warn_or_err" which is
'warning' or 'error', "werr_line" being the line number at
which the error occurs, and 'werr_text' being a string
describing the warning/error.
"""
student_csv_file_name = Path(student_csv_file_name)
if not student_csv_file_name.exists():
raise FileNotFoundError(f'Cannot find file "{student_csv_file_name}"')
vlad = PlomClasslistValidator()
if not vlad.check_is_canvas_csv(student_csv_file_name):
success, warn_err = vlad.validate_csv(student_csv_file_name, spec=spec)
if success is False:
# validation failed, return warning, error list
PlomClasslistValidator.print_classlist_warnings_errors(warn_err)
return (False, warn_err)
# validation passed but there are warnings
if warn_err:
PlomClasslistValidator.print_classlist_warnings_errors(warn_err)
if not ignore_warnings:
return (False, warn_err)
print("Continuing despite warnings")
df = process_classlist_backend(student_csv_file_name)
# "records" makes it output a list-of-dicts, one per row
return (True, df.to_dict("records"))