Skip to content
Snippets Groups Projects
Commit f12a59f9 authored by Gilares Paul's avatar Gilares Paul
Browse files

Edit 2-remove_dup.py

parent fd8385b8
No related branches found
No related tags found
No related merge requests found
import pandas as pd
def remove_duplicates(csv_file, date):
# Load the CSV file
df = pd.read_csv(csv_file)
# Remove duplicates
df_no_duplicates = df.drop_duplicates()
print(f"Nombre de lignes après suppression des doublons : {len(df_no_duplicates)}")
# Save the file without duplicates
df_no_duplicates.to_csv(f"data/processed/Final{date}.csv", index=False)
print(f"Nombre de lignes AVANT suppression des doublons : {len(df)}")
print(f"Nombre de lignes APRES suppression des doublons : {len(df_no_duplicates)}")
print("Duplicates removed and file saved")
df_no_duplicates.to_csv(f"data/processed/Table{date}_no_dup.csv", index=False)
print(f"Fichier sauvegardé sans doublons pour {date}")
dates = ["20" + str(i).zfill(2) for i in range(18, 25)]
processed_dates = []
for date in dates:
print(f"Processing data for {date}")
csv_file = f"data/processed/Filtre{date}_gps_corrected.csv" # Replace with your CSV file path
print(f"Suppression des doublons pour {date}")
csv_file = f"data/processed/Table{date}_normalized.csv"
remove_duplicates(csv_file, date)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment