Skip to content
Snippets Groups Projects
Commit b5a9ba8e authored by atebboun's avatar atebboun
Browse files

hop

parent 7f0f0a49
Branches
No related tags found
No related merge requests found
No preview for this file type
Source diff could not be displayed: it is too large. Options to address this: view the blob.
...@@ -10,9 +10,9 @@ fichier pour centraliser toutes les transformations de données ...@@ -10,9 +10,9 @@ fichier pour centraliser toutes les transformations de données
''' '''
class PreprocessData: class PreprocessData:
def __init__(self,path_X,path_y): def __init__(self,path_X,path_y):
self.X=pd.read_csv(path_X) self.X=path_X
self.y = pd.read_csv(path_y) self.y = path_y
self.enlever_index() self.enlever_index()
...@@ -20,6 +20,8 @@ class PreprocessData: ...@@ -20,6 +20,8 @@ class PreprocessData:
#self.virer_patient() #self.virer_patient()
self.encoder_cohort() self.encoder_cohort()
self.imputer_age_at_diagnosis() self.imputer_age_at_diagnosis()
#self.rajout_feature_temps()
self.encoder_patient()
...@@ -86,7 +88,32 @@ class PreprocessData: ...@@ -86,7 +88,32 @@ class PreprocessData:
''' '''
Pour capturer la relation temporelle Pour capturer la relation temporelle
''' '''
pass
# rajouter le numéro de la visite
self.X['num_visite'] = self.X.groupby('patient_id').cumcount() + 1
# rajouter le nombre de visite total
self.X['nb_visites'] = self.X.groupby('patient_id')['num_visite'].transform('max')
# rajouter la progression du score on et off depuis la dernière visite
self.X['diff_on'] = self.X.groupby('patient_id')['on'].diff()
self.X['diff_off'] = self.X.groupby('patient_id')['off'].diff()
# rajouter la progression du score on et off depuis la première visite
self.X['diff_on_first'] = self.X.groupby('patient_id')['on'].transform('first')
self.X['diff_off_first'] = self.X.groupby('patient_id')['off'].transform('first')
# rajouter la moyenne du score on et off sur toutes les visites
self.X['mean_on'] = self.X.groupby('patient_id')['on'].transform('mean')
self.X['mean_off'] = self.X.groupby('patient_id')['off'].transform('mean')
# rajouter l'écart type du score on et off sur toutes les visites
self.X['std_on'] = self.X.groupby('patient_id')['on'].transform('std')
self.X['std_off'] = self.X.groupby('patient_id')['off'].transform('std')
# rajouter le temps depuis la dernière visite
self.X['time_since_last_visit'] = self.X.groupby('patient_id')['age'].diff()
def imputer_age_at_diagnosis(self): def imputer_age_at_diagnosis(self):
""" """
Impute les valeurs manquantes de age_at_diagnosis en utilisant une régression linéaire. Impute les valeurs manquantes de age_at_diagnosis en utilisant une régression linéaire.
......
...@@ -226,7 +226,7 @@ ...@@ -226,7 +226,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 39, "execution_count": 50,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
...@@ -236,7 +236,7 @@ ...@@ -236,7 +236,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 45, "execution_count": 51,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
...@@ -255,7 +255,7 @@ ...@@ -255,7 +255,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 46, "execution_count": 52,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
...@@ -277,7 +277,7 @@ ...@@ -277,7 +277,7 @@
"dtype: float64" "dtype: float64"
] ]
}, },
"execution_count": 46, "execution_count": 52,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment