step1 complete step2 started

This commit is contained in:
Patrick
2024-02-27 19:16:32 +01:00
parent 3b407bd956
commit 0632b1a7e1
5 changed files with 204 additions and 97 deletions

29
src/step2.py Normal file
View File

@@ -0,0 +1,29 @@
import pandas as pd
from Levenshtein import distance
def print_status(new, sys, bool_class):
print(f"\nEinträge in Import Liste: {len(new)}")
print(f"Einträge in System Liste: {len(sys)}")
if bool_class:
merged_df = pd.merge(new, sys, on=['name', 'vorname'], how='outer', indicator=True)
matches = pd.merge(new, sys, on=['name', 'vorname'])
matches = matches[['name', 'vorname', 'klasse']]
else:
merged_df = pd.merge(new, sys, on=['name', 'vorname'], how='outer', indicator=True)
matches = pd.merge(new, sys, on=['name', 'vorname'])
matches = matches[['name', 'vorname']]
# Subsets für Zeilen erstellen, die nur in einem der DataFrames vorhanden sind
only_new = merged_df[merged_df['_merge'] == 'left_only'].drop(columns=['_merge'])
only_sys = merged_df[merged_df['_merge'] == 'right_only'].drop(columns=['_merge'])
print("\nAnzahl Übereinstimmungen:", len(matches))
print("Anzahl neuer Nutzer:", len(only_new))
print("Anzahl veralteter Nutzer:", len(only_sys))
print(matches)
def search_typos(new, sys):
matches = pd.merge(new, sys, on=['name', 'vorname'])