From 65e67633956d3e3af9a1c3064d3f2f7ca828eb67 Mon Sep 17 00:00:00 2001 From: Patrick vom Hagen Date: Tue, 27 Aug 2024 13:04:25 +0200 Subject: [PATCH] anpassungen und verbesserungen --- main.py | 29 +++++++++++++------------- src/generate_commands.py | 2 ++ src/merge_xlsx_sheets.py | 2 +- src/step1.py | 44 ++++++++++++++++++++++++++++++++++++---- src/step2.py | 10 ++++----- src/step3.py | 4 +++- 6 files changed, 66 insertions(+), 25 deletions(-) diff --git a/main.py b/main.py index 343670a..d350500 100644 --- a/main.py +++ b/main.py @@ -38,9 +38,11 @@ ox_quota_sus = 5120 # oxUserQuota LuL if __name__ == "__main__": + school_folder = "7114SadW" + # Erstellt oder löscht Inhalte vorhandener Logdatei und loggt Konsolenausgaben - log_file_path = 'output/log.txt' - import_command_path = 'output/import_command.txt' + log_file_path = f'./Data/{school_folder}/gen_log.txt' + import_command_path = f'./Data/{school_folder}/gen_importCommand.txt' if os.path.exists(log_file_path): open(log_file_path, 'w').close() sys.stdout = Logger.Logger(log_file_path) @@ -53,14 +55,14 @@ if __name__ == "__main__": # ToDo Daten mit UI einlesen # lehrer_liste_neu = "./Data/SaM/export_lehrer_SaM.csv" - school_folder = "johanneum" + lehrer_liste_neu = f"./Data/{school_folder}/export_lul.csv" - lehrer_liste_system = f"./Data//{school_folder}/sys_lul.csv" - lul_out_path = 'output/outputLehrer.csv' + lehrer_liste_system = f"./Data/{school_folder}/sys_lul.csv" + lul_out_path = f'./Data/{school_folder}/gen_Lehrer.csv' schueler_liste_neu = f"./Data/{school_folder}/export_sus.csv" schueler_liste_system = f"./Data/{school_folder}/sys_sus.csv" - sus_out_path = 'output/outputSchueler.csv' + sus_out_path = f'./Data/{school_folder}/gen_Schueler.csv' # Variablen füllen: if not dev: @@ -76,9 +78,11 @@ if __name__ == "__main__": lul_new = step1.check_export_file(lehrer_liste_neu) lul_sys = step1.create_dataframe_system(lehrer_liste_system) + step1.check_quotas(lul_sys, mail_quota_lul, ox_quota_lul) sus_new = step1.check_export_file(schueler_liste_neu) sus_sys = step1.create_dataframe_system(schueler_liste_system) + step1.check_quotas(sus_sys, mail_quota_sus, ox_quota_lul) # Test- und Funktionsuser auslagern keywords = ['Test', 'test', 'Raum', 'raum', 'User', 'user', 'Tafel', 'tafel', 'Admin', 'admin'] @@ -91,23 +95,20 @@ if __name__ == "__main__": # Step 2 - auf name, vorname reduzieren und abgleichen # # Klassen Übersicht leeren - with open('./output/klassen.txt', 'w') as file: + with open(f'./Data/{school_folder}/gen_klassen.txt', 'w') as file: file.write('') - lul_exel_path = 'output/stats_lul.xlsx' - sus_exel_path = 'output/stats_sus.xlsx' + lul_exel_path = f'./Data/{school_folder}/gen_stats_lul.xlsx' + sus_exel_path = f'./Data/{school_folder}/gen_stats_sus.xlsx' print("\n Lehrer:innen:") - lul_matched, new_lul = step2.compare_data(lul_new, lul_sys, len(lul_testuser_df), lul_exel_path) + lul_matched, new_lul = step2.compare_data(lul_new, lul_sys, len(lul_testuser_df), lul_exel_path, school_folder) print("\n Schüler:innen:") - sus_matched, new_sus = step2.compare_data(sus_new, sus_sys, len(sus_testuser_df), sus_exel_path) + sus_matched, new_sus = step2.compare_data(sus_new, sus_sys, len(sus_testuser_df), sus_exel_path, school_folder) # ----------------------------------------------------------------------------------------# # Step 3 - Import Data generieren - klasse, uuids, weiteres in einer Liste zusammenführen # - # ToDo verfügbare Klassen listen und abgleichen - # ToDo Quota Abweichungen ermitteln und übernehmen - # Lul: Namen + UUIDs + Testuser step3.create_output_list('LuL', new_lul, lul_matched, dev, school_id, ox_context, mail_quota_lul, ox_quota_lul, lul_testuser_df, lul_out_path) diff --git a/src/generate_commands.py b/src/generate_commands.py index a60b5ef..762fdf9 100644 --- a/src/generate_commands.py +++ b/src/generate_commands.py @@ -10,3 +10,5 @@ def generate_commands(path, sid, context, short): file.write('# Ohne Dry Run ausführen') file.write('# Summary kopieren (sudo cp .)') + file.write('# Neue Listen in ucs-export generieren') + file.write() diff --git a/src/merge_xlsx_sheets.py b/src/merge_xlsx_sheets.py index cb0e7be..96597c3 100644 --- a/src/merge_xlsx_sheets.py +++ b/src/merge_xlsx_sheets.py @@ -1,7 +1,7 @@ import pandas as pd # Laden Sie die Excel-Datei mit mehreren Seiten -excel_file = pd.ExcelFile('C:/Users/Patrick vom Hagen/Documents/Import/Falkenfled/Schüler und Schülerinnen gesamt.xls') +excel_file = pd.ExcelFile('C:/Users/Patrick vom Hagen/PycharmProjects/UCS_Import_Python/Data/5104OzD/OzD_Abendgymnasium.xlsx') dfs = [] for sheet_name in excel_file.sheet_names: diff --git a/src/step1.py b/src/step1.py index e410704..52be788 100644 --- a/src/step1.py +++ b/src/step1.py @@ -12,7 +12,9 @@ def check_export_file(path): # Try: Datei in pandas einlesen try: - return pd.read_csv(path, encoding=detected_encoding, sep=';') + df = pd.read_csv(path, encoding=detected_encoding, sep=';') + df = check_columns(df) + return df # Catch: zusätzliche Kommas entfernen except pd.errors.ParserError as e: # Wenn ein Parserfehler auftritt, gibt eine Fehlermeldung aus @@ -26,18 +28,45 @@ def check_export_file(path): print(f"Alle Kommas entfernt, einlesen wir erneut versucht ...") # Nach Komma Ersetzung erneut versuchen try: - return pd.read_csv(path, encoding=detected_encoding, sep=';') + df = pd.read_csv(path, encoding=detected_encoding, sep=';') + df = check_columns(df) + return df except pd.errors.ParserError as e: print(f"Erneut Fehler in CSV-Datei: {e}") print(f"Datei muss manuell geändert werden.") +# die Spalten benötigen fest definierte Namen, auch in Bezug auf Groß- und Kleinschreibung +# häufige Abweichungen automatisch anpassen: +def check_columns(data): + for column in data.columns: + if column == 'Nachname': + data = data.rename(columns={'Nachname': 'name'}) + if column == 'Name': + data = data.rename(columns={'Name': 'name'}) + if column == 'Vorname': + data = data.rename(columns={'Vorname': 'vorname'}) + if column == 'Klasse': + data = data.rename(columns={'Klasse': 'klasse'}) + check_duplicates(data) + return data + + +def check_duplicates(df): + df['full_name'] = df['name'] + ' ' + df['vorname'] + + duplicates = df[df.duplicated('full_name', keep=False)] + if not duplicates.empty: + print("\nWARNUNG: Duplikate gefunden:") + print(duplicates) + + # zum Einlesen der bisherigen Systemdaten def create_dataframe_system(path): try: return pd.read_csv(path, encoding='utf', sep=';') except pd.errors.ParserError as e: - print(f"Fehler beim Einlesen der CSV") + print(f"Fehler {e} beim Einlesen der System CSV: {path}") # zum Extrahieren von Test- und Funktionsusern @@ -57,4 +86,11 @@ def contains_search_term(row, key): return False - +# Prüfen, ob erhöhte Quotas vergeben wurden +def check_quotas(data, mail_quota, ox_quota): + print("\n Quota Check:") + mail_quota_exceptions = data[data['mailUserQuota'] > mail_quota] + print(mail_quota_exceptions.drop(columns=['klasse', 'schuelerid', 'oxContext', 'oxUserQuota'])) + ox_quota_exceptions = data[data['oxUserQuota'] > ox_quota] + print(ox_quota_exceptions.drop(columns=['klasse', 'schuelerid', 'oxContext', 'mailUserQuota'])) + return data diff --git a/src/step2.py b/src/step2.py index 5c9d44b..619b280 100644 --- a/src/step2.py +++ b/src/step2.py @@ -3,7 +3,7 @@ import pandas as pd from Levenshtein import distance -def compare_data(new, sys, count_test, path): +def compare_data(new, sys, count_test, path, school): print(f"\nEinträge in System Liste: {len(sys)}") print(f"Einträge in Import Liste: {len(new)}") @@ -12,8 +12,8 @@ def compare_data(new, sys, count_test, path): if bool_class: if 'index' in new.columns: new = new.drop('index', axis=1) - unique_classes(new) - unique_classes(sys) + unique_classes(new, school) + unique_classes(sys, school) sys = sys.drop(columns=['klasse']) merged_df = pd.merge(new[['name', 'vorname', 'klasse']], sys, on=['name', 'vorname'], how='outer', indicator=True) matches = pd.merge(new, sys, on=['name', 'vorname']) @@ -47,11 +47,11 @@ def search_typos(new, sys): print('Mögliche Tippfehler: keine Fehler gefunden!') -def unique_classes(df): +def unique_classes(df, school): df['klasse'] = df['klasse'].str.split(',') df = df.explode('klasse') eindeutige_klassen = df['klasse'].unique() - with open('./output/klassen.txt', 'a') as file: + with open(f'./Data/{school}/gen_klassen.txt', 'a') as file: file.write(np.array_str(eindeutige_klassen)) # print(eindeutige_klassen) diff --git a/src/step3.py b/src/step3.py index 700fd3a..f847513 100644 --- a/src/step3.py +++ b/src/step3.py @@ -8,7 +8,8 @@ def create_uuid(): def add_hl_tag(sid, row): klasse = str(row['klasse']) - klasse = klasse.lstrip('0').lower() + klasse = klasse.lstrip('[').rstrip(']').replace('\'', '') + # klasse = klasse.lstrip('0').lower() if klasse != 'nan': return sid + '-' + klasse @@ -34,6 +35,7 @@ def add_school_data(df, sid, ox_context, mail_quota, ox_quota): def create_output_list(name, new, matched, dev, sid, oxc, mail_q, oxq, testuser, out_path): df = merch_uuids(new, matched, dev) df = add_school_data(df, sid, oxc, mail_q, oxq) + # print(df.drop(columns=['schuelerid', 'oxContext', 'mailUserQuota', 'full_name', 'oxUserQuota'])) df = pd.concat([df, testuser], ignore_index=True) df = df[['name', 'vorname', 'klasse', 'schuelerid', 'mailUserQuota', 'oxUserQuota', 'oxContext']] print(f"\n{len(testuser)} Testuser angefügt - {len(df)} Einträge in {name} Liste")