Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
D
Data Visualisation Quality Water
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Besson Lucas
Data Visualisation Quality Water
Commits
814e03e2
Commit
814e03e2
authored
2 months ago
by
Gilares Paul
Browse files
Options
Downloads
Patches
Plain Diff
Delete old useless function
parent
972bdd5f
Loading
Loading
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/1-processing.py
+0
-120
0 additions, 120 deletions
src/1-processing.py
with
0 additions
and
120 deletions
src/1-processing.py
deleted
100644 → 0
+
0
−
120
View file @
972bdd5f
import
pandas
as
pd
import
os
import
pyproj
def
lambert93_to_wgs84
(
x
,
y
):
"""
Convertit des coordonnées Lambert 93 en latitude et longitude WGS 84.
"""
try
:
x
,
y
=
float
(
x
),
float
(
y
)
transformer
=
pyproj
.
Transformer
.
from_crs
(
"
EPSG:2154
"
,
"
EPSG:4326
"
,
always_xy
=
True
)
lon
,
lat
=
transformer
.
transform
(
x
,
y
)
return
lat
,
lon
except
ValueError
:
return
None
,
None
def
charger_donnees
(
date
):
"""
Charge et nettoie les données UDI_RES et UDI_PLV.
"""
file_path_res
=
f
"
data/raw/UDI_RES_
{
date
}
.txt
"
file_path_plv
=
f
"
data/raw/UDI_PLV_
{
date
}
.txt
"
if
not
os
.
path
.
exists
(
file_path_res
)
or
not
os
.
path
.
exists
(
file_path_plv
):
print
(
f
"
Fichiers manquants pour
{
date
}
, passage...
"
)
return
False
# Chargement des données UDI_RES
columns_res
=
[
"
cddept
"
,
"
referenceprel
"
,
"
cdparametre
"
,
"
rsana
"
,
"
cdunitereferencesiseeaux
"
,
"
cdunitereference
"
,
"
rqana
"
,
"
rssigne
"
,
"
representativite
"
]
data_res
=
pd
.
read_csv
(
file_path_res
,
sep
=
"
,
"
,
dtype
=
str
)
data_res
=
data_res
[
columns_res
].
copy
()
# Conversion des valeurs numériques de rqana
data_res
[
'
rqana
'
]
=
pd
.
to_numeric
(
data_res
[
'
rqana
'
].
str
.
extract
(
r
'
(\d+)
'
,
expand
=
False
),
errors
=
'
coerce
'
)
output_res
=
f
"
data/processed/res
{
date
}
.csv
"
data_res
.
to_csv
(
output_res
,
index
=
False
)
# Chargement des données UDI_PLV
columns_plv
=
[
"
cddept
"
,
"
inseecommune
"
,
"
nomcommune
"
,
"
cdreseau
"
,
"
cdpointsurv
"
,
"
nompointsurv
"
,
"
referenceprel
"
,
"
dateprel
"
,
"
finaliteprel
"
,
"
conclusionprel
"
,
"
cdtypeeau
"
,
"
plvconformitebacterio
"
,
"
plvconformitechimique
"
,
"
plvconformiterefbacterio
"
,
"
plvconformiterefchimique
"
,
"
coord_x
"
,
"
coord_y
"
]
data_plv
=
pd
.
read_csv
(
file_path_plv
,
sep
=
"
,
"
,
dtype
=
str
)
data_plv
=
data_plv
[
columns_plv
].
copy
()
# Conversion des coordonnées
data_plv
[[
'
coord_x
'
,
'
coord_y
'
]]
=
data_plv
.
apply
(
lambda
row
:
lambert93_to_wgs84
(
row
[
'
coord_x
'
],
row
[
'
coord_y
'
]),
axis
=
1
,
result_type
=
'
expand
'
)
output_plv
=
f
"
data/processed/plv
{
date
}
.csv
"
data_plv
.
to_csv
(
output_plv
,
index
=
False
)
return
True
def
jointure
(
date
):
"""
Jointure des fichiers RES et PLV.
"""
res_path
=
f
"
data/processed/res
{
date
}
.csv
"
plv_path
=
f
"
data/processed/plv
{
date
}
.csv
"
if
not
os
.
path
.
exists
(
res_path
)
or
not
os
.
path
.
exists
(
plv_path
):
print
(
f
"
Fichiers intermédiaires manquants pour
{
date
}
, passage...
"
)
return
False
data_res
=
pd
.
read_csv
(
res_path
,
dtype
=
str
)
data_plv
=
pd
.
read_csv
(
plv_path
,
dtype
=
str
)
# Vérification des références avant la jointure
data_res
[
'
referenceprel
'
]
=
data_res
[
'
referenceprel
'
].
astype
(
str
).
str
.
strip
()
data_plv
[
'
referenceprel
'
]
=
data_plv
[
'
referenceprel
'
].
astype
(
str
).
str
.
strip
()
data
=
pd
.
merge
(
data_res
,
data_plv
,
on
=
"
referenceprel
"
,
how
=
"
outer
"
,
indicator
=
True
)
print
(
data
[
"
_merge
"
].
value_counts
())
output_path
=
f
"
data/processed/Table
{
date
}
.csv
"
data
.
drop
(
columns
=
[
'
_merge
'
],
inplace
=
True
)
data
.
to_csv
(
output_path
,
index
=
False
)
return
True
def
supprimer_donnees
(
date
):
"""
Supprime les fichiers temporaires uniquement si la jointure a réussi.
"""
table_path
=
f
"
data/processed/Table
{
date
}
.csv
"
if
os
.
path
.
exists
(
table_path
):
os
.
remove
(
f
"
data/processed/res
{
date
}
.csv
"
)
os
.
remove
(
f
"
data/processed/plv
{
date
}
.csv
"
)
dates
=
[
"
20240
"
+
str
(
i
)
for
i
in
range
(
1
,
10
)]
+
[
"
2024
"
+
str
(
i
)
for
i
in
range
(
10
,
13
)]
processed_dates
=
[]
for
date
in
dates
:
print
(
f
"
Processing data for
{
date
}
"
)
if
charger_donnees
(
date
):
if
jointure
(
date
):
supprimer_donnees
(
date
)
processed_dates
.
append
(
date
)
print
(
f
"
Data for
{
date
}
processed
"
)
else
:
print
(
f
"
Jointure échouée pour
{
date
}
"
)
else
:
print
(
f
"
Chargement échoué pour
{
date
}
"
)
def
jointure_finale
():
"""
Concatène toutes les tables intermédiaires en une seule table finale.
"""
paths
=
[
f
"
data/processed/Table
{
date
}
.csv
"
for
date
in
processed_dates
]
valid_paths
=
[
path
for
path
in
paths
if
os
.
path
.
exists
(
path
)]
if
not
valid_paths
:
print
(
"
Aucun fichier de jointure valide, fin du processus.
"
)
return
data
=
pd
.
concat
([
pd
.
read_csv
(
path
,
dtype
=
str
)
for
path
in
valid_paths
],
ignore_index
=
True
)
data
.
to_csv
(
"
data/processed/Table2024.csv
"
,
index
=
False
)
print
(
"
Table finale sauvegardée.
"
)
jointure_finale
()
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment