step3.py updated
This commit is contained in:
11
src/step2.py
11
src/step2.py
@@ -2,10 +2,12 @@ import pandas as pd
|
||||
from Levenshtein import distance
|
||||
|
||||
|
||||
def compare_data(new, sys, bool_class):
|
||||
def compare_data(new, sys):
|
||||
print(f"\nEinträge in Import Liste: {len(new)}")
|
||||
print(f"Einträge in System Liste: {len(sys)}")
|
||||
|
||||
bool_class = 'klasse' in new.columns
|
||||
|
||||
if bool_class:
|
||||
if 'index' in new.columns:
|
||||
new = new.drop('index', axis=1)
|
||||
@@ -35,9 +37,10 @@ def search_typos(new, sys):
|
||||
for idx1, row1 in new.iterrows():
|
||||
for idx2, row2 in sys.iterrows():
|
||||
if distance(row1[col1], row2[col1]) <= 2 and distance(row1[col2], row2[col2]) <= 2:
|
||||
typos.append((row1[col1], row1[col2], row2[col1], row2[col2]))
|
||||
if len(typos) > 0:
|
||||
print('Mögliche Tippfehler:', len(typos), '\n', typos)
|
||||
typos.append(([row1[col1], row1[col2]], [row2[col1], row2[col2]]))
|
||||
typo_df = pd.DataFrame(typos, columns=['Import', 'System'])
|
||||
if len(typo_df) > 0:
|
||||
print('Mögliche Tippfehler:', len(typo_df), '\n', typo_df)
|
||||
else:
|
||||
print('Mögliche Tippfehler: keine Fehler gefunden!')
|
||||
|
||||
|
||||
Reference in New Issue
Block a user