step1 complete step2 started
This commit is contained in:
27
src/step1.py
27
src/step1.py
@@ -8,11 +8,11 @@ def check_export_file(path):
|
||||
result = chardet.detect(file.read())
|
||||
|
||||
detected_encoding = result['encoding']
|
||||
print(detected_encoding)
|
||||
# print(detected_encoding)
|
||||
|
||||
# Try: Datei in pandas einlesen
|
||||
try:
|
||||
return pd.read_csv(path, encoding=detected_encoding)
|
||||
return pd.read_csv(path, encoding=detected_encoding, sep=';')
|
||||
# Catch: zusätzliche Kommas entfernen
|
||||
except pd.errors.ParserError as e:
|
||||
# Wenn ein Parserfehler auftritt, gibt eine Fehlermeldung aus
|
||||
@@ -26,7 +26,7 @@ def check_export_file(path):
|
||||
print(f"Alle Kommas entfernt, einlesen wir erneut versucht ...")
|
||||
# Nach Komma Ersetzung erneut versuchen
|
||||
try:
|
||||
return pd.read_csv(path, encoding=detected_encoding)
|
||||
return pd.read_csv(path, encoding=detected_encoding, sep=';')
|
||||
except pd.errors.ParserError as e:
|
||||
print(f"Erneut Fehler in CSV-Datei: {e}")
|
||||
print(f"Datei muss manuell geändert werden.")
|
||||
@@ -35,16 +35,21 @@ def check_export_file(path):
|
||||
# zum Einlesen der bisherigen Systemdaten
|
||||
def create_dataframe_system(path):
|
||||
try:
|
||||
return pd.read_csv(path, encoding='utf')
|
||||
return pd.read_csv(path, encoding='utf', sep=';')
|
||||
except pd.errors.ParserError as e:
|
||||
print(f"Fehler beim Einlesen der CSV")
|
||||
|
||||
|
||||
# zum Extrahieren von Test- und Funktionsusern
|
||||
def extract_testusers(dataframe, keywords):
|
||||
testdata_df = pd.DataFrame(columns=dataframe.columns)
|
||||
for index, row in dataframe.iterrows():
|
||||
if any(row.str.contains(keywords)):
|
||||
#extract = dataframe.loc[row.str.contains(keywords).name]
|
||||
print(row.str.contains(keywords))
|
||||
return testdata_df
|
||||
def extract_testusers(df, keywords):
|
||||
data = pd.DataFrame()
|
||||
for keyword in keywords:
|
||||
data = pd.concat([data, df[df.apply(contains_search_term, axis=1, key=keyword)]])
|
||||
return data
|
||||
|
||||
|
||||
def contains_search_term(row, key):
|
||||
for value in row:
|
||||
if key in str(value):
|
||||
return True
|
||||
return False
|
||||
|
||||
29
src/step2.py
Normal file
29
src/step2.py
Normal file
@@ -0,0 +1,29 @@
|
||||
import pandas as pd
|
||||
from Levenshtein import distance
|
||||
|
||||
|
||||
def print_status(new, sys, bool_class):
|
||||
print(f"\nEinträge in Import Liste: {len(new)}")
|
||||
print(f"Einträge in System Liste: {len(sys)}")
|
||||
|
||||
if bool_class:
|
||||
merged_df = pd.merge(new, sys, on=['name', 'vorname'], how='outer', indicator=True)
|
||||
matches = pd.merge(new, sys, on=['name', 'vorname'])
|
||||
matches = matches[['name', 'vorname', 'klasse']]
|
||||
else:
|
||||
merged_df = pd.merge(new, sys, on=['name', 'vorname'], how='outer', indicator=True)
|
||||
matches = pd.merge(new, sys, on=['name', 'vorname'])
|
||||
matches = matches[['name', 'vorname']]
|
||||
|
||||
# Subsets für Zeilen erstellen, die nur in einem der DataFrames vorhanden sind
|
||||
only_new = merged_df[merged_df['_merge'] == 'left_only'].drop(columns=['_merge'])
|
||||
only_sys = merged_df[merged_df['_merge'] == 'right_only'].drop(columns=['_merge'])
|
||||
|
||||
print("\nAnzahl Übereinstimmungen:", len(matches))
|
||||
print("Anzahl neuer Nutzer:", len(only_new))
|
||||
print("Anzahl veralteter Nutzer:", len(only_sys))
|
||||
print(matches)
|
||||
|
||||
|
||||
def search_typos(new, sys):
|
||||
matches = pd.merge(new, sys, on=['name', 'vorname'])
|
||||
Reference in New Issue
Block a user