changes bei EGS Import
This commit is contained in:
10
main.py
10
main.py
@@ -4,13 +4,13 @@ if __name__ == "__main__":
|
|||||||
oldlist = 'Data/alte-liste-utf.csv'
|
oldlist = 'Data/alte-liste-utf.csv'
|
||||||
newlist = 'Data/neue-liste-utf.csv'
|
newlist = 'Data/neue-liste-utf.csv'
|
||||||
|
|
||||||
test = 'GGS/downloadS.csv'
|
test = 'GPS/lehrer_HL0707113.csv_intern'
|
||||||
new_test = 'GGS/ggsSold.csv'
|
new_test = 'GPS/gpsLold.csv'
|
||||||
clean = 'GGS/ggsSold2.cvs'
|
clean = 'GPS/gpsLold2.cvs'
|
||||||
|
|
||||||
# Liste nach Fehler prüfen und Zeichen ersetzen
|
# Liste nach Fehler prüfen und Zeichen ersetzen
|
||||||
# check_file(test)
|
check_file(test)
|
||||||
# format_csv(test, new_test)
|
format_csv(test, new_test)
|
||||||
clean_data(new_test, clean)
|
clean_data(new_test, clean)
|
||||||
|
|
||||||
# path_old_csv = input('Pfad zur alten Liste eingeben eingeben: ')
|
# path_old_csv = input('Pfad zur alten Liste eingeben eingeben: ')
|
||||||
|
|||||||
140
src/compareL.py
Normal file
140
src/compareL.py
Normal file
@@ -0,0 +1,140 @@
|
|||||||
|
import csv
|
||||||
|
from Levenshtein import distance
|
||||||
|
import pandas as pd
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
|
||||||
|
# TODO Filter für Spalten, ggfs. Klasse benötigt
|
||||||
|
|
||||||
|
# TODO Filter für Dublikate hier wird dann die Klasse benötigt
|
||||||
|
|
||||||
|
|
||||||
|
def read_csv(file_path):
|
||||||
|
data = []
|
||||||
|
with open(file_path, newline='', encoding='utf-8') as csvfile:
|
||||||
|
reader = csv.reader(csvfile, delimiter=';')
|
||||||
|
for row in reader:
|
||||||
|
data.append((row[0].strip(), row[1].strip()))
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def similar_sets(pair, data):
|
||||||
|
similar_pairs = []
|
||||||
|
for item in data:
|
||||||
|
if distance(pair[0], item[0]) <= 1 and distance(pair[1], item[1]) <= 1:
|
||||||
|
similar_pairs.append(item)
|
||||||
|
return similar_pairs
|
||||||
|
|
||||||
|
|
||||||
|
def compare_csv(file1, file2):
|
||||||
|
data1 = read_csv(file1)
|
||||||
|
data2 = read_csv(file2)
|
||||||
|
|
||||||
|
common_pairs = set(data1) & set(data2)
|
||||||
|
unique_pairs1 = set(data1) - common_pairs
|
||||||
|
unique_pairs2 = set(data2) - common_pairs
|
||||||
|
|
||||||
|
return common_pairs, unique_pairs1, unique_pairs2, data1, data2
|
||||||
|
|
||||||
|
|
||||||
|
def find_similar_pairs(pair, other_data):
|
||||||
|
similar_pairs = []
|
||||||
|
for item in other_data:
|
||||||
|
if distance(pair[0], item[0]) <= 2 and distance(pair[1], item[1]) <= 2:
|
||||||
|
similar_pairs.append(item)
|
||||||
|
return similar_pairs
|
||||||
|
|
||||||
|
|
||||||
|
def create_uuid():
|
||||||
|
return str(uuid.uuid4())
|
||||||
|
|
||||||
|
|
||||||
|
def add_hl_tag(row):
|
||||||
|
klasse = str(row['klasse']).lstrip('0')
|
||||||
|
return 'HL0707104-' + klasse
|
||||||
|
|
||||||
|
|
||||||
|
def create_import_list(path, path_new, old_pairs, new_pairs, common_pairs):
|
||||||
|
system_data = pd.read_csv(path, sep=';', encoding='utf-8')
|
||||||
|
system_data = system_data[~system_data[['name', 'vorname']].apply(tuple, axis=1).isin(old_pairs)]
|
||||||
|
# print(len(system_data))
|
||||||
|
# print(system_data)
|
||||||
|
new_data = pd.read_csv(path_new, sep=';', encoding='utf-8')
|
||||||
|
|
||||||
|
# Bei Schüler: alte Klassen gelöscht, mit neuen Klassen aus new-data auffüllen
|
||||||
|
|
||||||
|
matches = new_data[~new_data[['name', 'vorname']].apply(tuple, axis=1).isin(new_pairs)]
|
||||||
|
# matches.loc[:, 'klasse'] = matches.apply(add_hl_tag, axis=1)
|
||||||
|
# print(len(matches))
|
||||||
|
# print(matches)
|
||||||
|
system_data = pd.merge(system_data, matches, how='outer', left_on=['name', 'vorname'], right_on=['name', 'vorname'])
|
||||||
|
system_data = system_data[['name', 'vorname', 'klasse', 'schuelerid']]
|
||||||
|
system_data = system_data.drop('klasse', axis=1, errors='ignore')
|
||||||
|
print(system_data.columns)
|
||||||
|
print("Passende Einträge:" + str(len(system_data)))
|
||||||
|
|
||||||
|
new_data = new_data[~new_data[['name', 'vorname']].apply(tuple, axis=1).isin(common_pairs)]
|
||||||
|
# new_data = new_data.drop('Unnamed: 2', axis=1, errors='ignore')
|
||||||
|
new_uuids = []
|
||||||
|
for row in range(len(new_data)):
|
||||||
|
new_uuids.append(create_uuid())
|
||||||
|
|
||||||
|
new_data.insert(loc=2, column='schuelerid', value=new_uuids)
|
||||||
|
mailUserQuota = 2048
|
||||||
|
oxUserQuota = 20480
|
||||||
|
oxContext = 25
|
||||||
|
|
||||||
|
print(new_data.columns)
|
||||||
|
print("New Data:" + str(len(new_data)))
|
||||||
|
|
||||||
|
# vor dem merge daten ergänzen
|
||||||
|
# import_df = pd.merge(system_data, new_data, how='outer', left_on=['name', 'vorname', 'mailUserQuota', 'oxUserQuota', 'oxContext'], right_on=['name', 'vorname', 'mailUserQuota', 'oxUserQuota', 'oxContext'])
|
||||||
|
# import_df = pd.merge(system_data, new_data, how='outer',
|
||||||
|
# left_on=['name', 'vorname', 'klasse', 'schuelerid', 'mailUserQuota', 'oxUserQuota',
|
||||||
|
# 'oxContext'],
|
||||||
|
# right_on=['name', 'vorname', 'klasse', 'schuelerid', 'mailUserQuota', 'oxUserQuota',
|
||||||
|
# 'oxContext'])
|
||||||
|
import_df = pd.merge(system_data, new_data, how='outer',
|
||||||
|
left_on=['name', 'vorname', 'schuelerid'],
|
||||||
|
right_on=['name', 'vorname', 'schuelerid'])
|
||||||
|
import_df['mailUserQuota'] = mailUserQuota
|
||||||
|
import_df['oxUserQuota'] = oxUserQuota
|
||||||
|
import_df['oxContext'] = oxContext
|
||||||
|
import_df['klasse'] = None
|
||||||
|
import_df = import_df[['name', 'vorname', 'klasse', 'schuelerid', 'mailUserQuota', 'oxUserQuota', 'oxContext']]
|
||||||
|
|
||||||
|
# pd.set_option('display.max_rows', None)
|
||||||
|
# pd.set_option('display.max_columns', None)
|
||||||
|
# print(import_df)
|
||||||
|
# pd.reset_option('display.max_rows')
|
||||||
|
# pd.reset_option('display.max_columns')
|
||||||
|
out_path = '../GPS/outputLehrer.csv'
|
||||||
|
import_df.to_csv(out_path, sep=';', index=False)
|
||||||
|
print(len(import_df))
|
||||||
|
print('Lehrer Import Liste erzeugt')
|
||||||
|
print('Testuser manuell nachtragen!!')
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
file1_path = '../GPS/gpsLnew.csv'
|
||||||
|
file2_path = '../GPS/gpsLold2.cvs'
|
||||||
|
file3_path = '../GPS/gpsLold.csv'
|
||||||
|
|
||||||
|
common_pairs, new_pairs, old_pairs, data1, data2 = compare_csv(file1_path, file2_path)
|
||||||
|
|
||||||
|
print(f"Anzahl der übereinstimmenden Paare: {len(common_pairs)}")
|
||||||
|
print(f"Anzahl der neuen Einträge: {len(new_pairs)}")
|
||||||
|
print(f"Anzahl der veralteten Einträge: {len(old_pairs)}")
|
||||||
|
|
||||||
|
# Paare nur aus nicht zugeordneten Paaren aus neuer Liste erstellen
|
||||||
|
for pair in data1:
|
||||||
|
similar_pairs_list2 = find_similar_pairs(pair, set(data2) - {pair})
|
||||||
|
|
||||||
|
if similar_pairs_list2:
|
||||||
|
print(f"Ähnliche Paare in neuer Liste {pair} aktuell im System: {similar_pairs_list2}")
|
||||||
|
|
||||||
|
create_import_list(file3_path, file1_path, old_pairs, new_pairs, common_pairs)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -55,12 +55,12 @@ def add_hl_tag(row):
|
|||||||
|
|
||||||
|
|
||||||
def create_import_list(path, path_new, old_pairs, new_pairs, common_pairs):
|
def create_import_list(path, path_new, old_pairs, new_pairs, common_pairs):
|
||||||
df = pd.read_csv(path, sep=';', encoding='utf-8')
|
system_data = pd.read_csv(path, sep=';', encoding='utf-8')
|
||||||
df = df[~df[['name', 'vorname']].apply(tuple, axis=1).isin(old_pairs)]
|
system_data = system_data[~system_data[['name', 'vorname']].apply(tuple, axis=1).isin(old_pairs)]
|
||||||
df = df.drop('username', axis=1, errors='ignore')
|
system_data = system_data.drop('username', axis=1, errors='ignore')
|
||||||
df = df.drop('klasse', axis=1, errors='ignore')
|
system_data = system_data.drop('klasse', axis=1, errors='ignore')
|
||||||
# print(len(df))
|
# print(len(system_data))
|
||||||
# print(df)
|
# print(system_data)
|
||||||
new_data = pd.read_csv(path_new, sep=';', encoding='utf-8')
|
new_data = pd.read_csv(path_new, sep=';', encoding='utf-8')
|
||||||
|
|
||||||
# Bei Schüler: alte Klassen gelöscht, mit neuen Klassen aus new-data auffüllen
|
# Bei Schüler: alte Klassen gelöscht, mit neuen Klassen aus new-data auffüllen
|
||||||
@@ -69,11 +69,10 @@ def create_import_list(path, path_new, old_pairs, new_pairs, common_pairs):
|
|||||||
matches.loc[:, 'klasse'] = matches.apply(add_hl_tag, axis=1)
|
matches.loc[:, 'klasse'] = matches.apply(add_hl_tag, axis=1)
|
||||||
# print(len(matches))
|
# print(len(matches))
|
||||||
# print(matches)
|
# print(matches)
|
||||||
df = pd.merge(df, matches, how='outer', left_on=['name', 'vorname'], right_on=['name', 'vorname'])
|
system_data = pd.merge(system_data, matches, how='outer', left_on=['name', 'vorname'], right_on=['name', 'vorname'])
|
||||||
df = df[['name', 'vorname', 'klasse', 'schuelerid', 'mailUserQuota', 'oxUserQuota', 'oxContext']]
|
system_data = system_data[['name', 'vorname', 'klasse', 'schuelerid']]
|
||||||
# print(df)
|
# print(system_data)
|
||||||
print(len(df))
|
print(len(system_data))
|
||||||
|
|
||||||
|
|
||||||
new_data = new_data[~new_data[['name', 'vorname']].apply(tuple, axis=1).isin(common_pairs)]
|
new_data = new_data[~new_data[['name', 'vorname']].apply(tuple, axis=1).isin(common_pairs)]
|
||||||
# new_data = new_data.drop('Unnamed: 2', axis=1, errors='ignore')
|
# new_data = new_data.drop('Unnamed: 2', axis=1, errors='ignore')
|
||||||
@@ -86,23 +85,31 @@ def create_import_list(path, path_new, old_pairs, new_pairs, common_pairs):
|
|||||||
new_data.insert(loc=2, column='schuelerid', value=new_uuids)
|
new_data.insert(loc=2, column='schuelerid', value=new_uuids)
|
||||||
mailUserQuota = 1024
|
mailUserQuota = 1024
|
||||||
oxUserQuota = 5120
|
oxUserQuota = 5120
|
||||||
oxContext = 16
|
oxContext = 25
|
||||||
new_data['mailUserQuota'] = mailUserQuota
|
|
||||||
new_data['oxUserQuota'] = oxUserQuota
|
|
||||||
new_data['oxContext'] = oxContext
|
|
||||||
# print(new_data)
|
# print(new_data)
|
||||||
print(len(new_data))
|
print(len(new_data))
|
||||||
|
|
||||||
# vor dem merge daten ergänzen
|
# vor dem merge daten ergänzen
|
||||||
# import_df = pd.merge(df, new_data, how='outer', left_on=['name', 'vorname', 'mailUserQuota', 'oxUserQuota', 'oxContext'], right_on=['name', 'vorname', 'mailUserQuota', 'oxUserQuota', 'oxContext'])
|
# import_df = pd.merge(system_data, new_data, how='outer', left_on=['name', 'vorname', 'mailUserQuota', 'oxUserQuota', 'oxContext'], right_on=['name', 'vorname', 'mailUserQuota', 'oxUserQuota', 'oxContext'])
|
||||||
import_df = pd.merge(df, new_data, how='outer', left_on=['name', 'vorname', 'klasse', 'schuelerid', 'mailUserQuota', 'oxUserQuota', 'oxContext'], right_on=['name', 'vorname', 'klasse', 'schuelerid', 'mailUserQuota', 'oxUserQuota', 'oxContext'])
|
# import_df = pd.merge(system_data, new_data, how='outer',
|
||||||
|
# left_on=['name', 'vorname', 'klasse', 'schuelerid', 'mailUserQuota', 'oxUserQuota',
|
||||||
|
# 'oxContext'],
|
||||||
|
# right_on=['name', 'vorname', 'klasse', 'schuelerid', 'mailUserQuota', 'oxUserQuota',
|
||||||
|
# 'oxContext'])
|
||||||
|
import_df = pd.merge(system_data, new_data, how='outer',
|
||||||
|
left_on=['name', 'vorname', 'klasse', 'schuelerid'],
|
||||||
|
right_on=['name', 'vorname', 'klasse', 'schuelerid'])
|
||||||
|
import_df['mailUserQuota'] = mailUserQuota
|
||||||
|
import_df['oxUserQuota'] = oxUserQuota
|
||||||
|
import_df['oxContext'] = oxContext
|
||||||
|
|
||||||
# pd.set_option('display.max_rows', None)
|
# pd.set_option('display.max_rows', None)
|
||||||
# pd.set_option('display.max_columns', None)
|
# pd.set_option('display.max_columns', None)
|
||||||
# print(import_df)
|
# print(import_df)
|
||||||
# print(len(import_df))
|
|
||||||
# pd.reset_option('display.max_rows')
|
# pd.reset_option('display.max_rows')
|
||||||
# pd.reset_option('display.max_columns')
|
# pd.reset_option('display.max_columns')
|
||||||
out_path = '../GGS/outputSchueler.csv'
|
out_path = '../GPS/outputSchueler.csv'
|
||||||
import_df.to_csv(out_path, sep=';', index=False)
|
import_df.to_csv(out_path, sep=';', index=False)
|
||||||
print(len(import_df))
|
print(len(import_df))
|
||||||
print('Schüler Import Liste erzeugt')
|
print('Schüler Import Liste erzeugt')
|
||||||
@@ -110,9 +117,9 @@ def create_import_list(path, path_new, old_pairs, new_pairs, common_pairs):
|
|||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
file1_path = '../GGS/ggsSnew.csv' # Pfad zur ersten CSV-Datei
|
file1_path = '../GPS/gpsSnew.csv'
|
||||||
file2_path = '../GGS/ggsSold2.cvs' # Pfad zur zweiten CSV-Datei
|
file2_path = '../GPS/gpsSold2.cvs'
|
||||||
file3_path = '../GGS/downloadS.csv'
|
file3_path = '../GPS/gpsSold.csv'
|
||||||
|
|
||||||
common_pairs, new_pairs, old_pairs, data1, data2 = compare_csv(file1_path, file2_path)
|
common_pairs, new_pairs, old_pairs, data1, data2 = compare_csv(file1_path, file2_path)
|
||||||
|
|
||||||
@@ -120,11 +127,12 @@ def main():
|
|||||||
print(f"Anzahl der neuen Einträge: {len(new_pairs)}")
|
print(f"Anzahl der neuen Einträge: {len(new_pairs)}")
|
||||||
print(f"Anzahl der veralteten Einträge: {len(old_pairs)}")
|
print(f"Anzahl der veralteten Einträge: {len(old_pairs)}")
|
||||||
|
|
||||||
|
# Paare nur aus nicht zugeordneten Paaren aus neuer Liste erstellen
|
||||||
# for pair in data1:
|
# for pair in data1:
|
||||||
# similar_pairs_list2 = find_similar_pairs(pair, set(data2) - {pair})
|
# similar_pairs_list2 = find_similar_pairs(pair, set(data2) - {pair})
|
||||||
#
|
#
|
||||||
# if similar_pairs_list2:
|
# if similar_pairs_list2:
|
||||||
# print(f"Ähnliche Paare zu {pair} in Liste 2: {similar_pairs_list2}")
|
# print(f"Ähnliche Paare in neuer Liste {pair} aktuell im System: {similar_pairs_list2}")
|
||||||
|
|
||||||
create_import_list(file3_path, file1_path, old_pairs, new_pairs, common_pairs)
|
create_import_list(file3_path, file1_path, old_pairs, new_pairs, common_pairs)
|
||||||
|
|
||||||
Reference in New Issue
Block a user