Skip to content
Snippets Groups Projects
Commit 63f93193 authored by Tebboune Amel's avatar Tebboune Amel
Browse files

pzfunzapf

parent 9f09034b
Branches
No related tags found
No related merge requests found
File added
File added
Source diff could not be displayed: it is too large. Options to address this: view the blob.
import pandas as pd
import numpy as np
'''
fichier pour centraliser toutes les transformations de données
'''
class PreprocessData:
def __init__(self,path_X,path_y):
self.X = pd.read_csv(path_X)
self.y = pd.read_csv(path_y)
print("coucou")
self.virer_patient()
print("coucou 2")
self.remplir_gene()
print("Coucou 3")
def valeurs_off(self):
pass
def remplir_gene(self):
pass
def virer_patient(self):
self.X.drop('patient_id',axis=1,inplace=True)
def remplir_gene(self):
X_list=self.X['gene'].tolist()
def f_l(x):
if x=='Inconnu':
return 0.4705
elif x=='LRRK2+':
return 1
else :
return 0
def f_g(x):
if x=='GBA+':
return 1
elif x=='Inconnu':
return 0.4080
else :
return 0
def f_o(x):
if x=='OTHER+':
return 1
elif x=='Inconnu':
return 0.1211
else :
return 0
for i in range(len(X_list)):
x=X_list[i]
if type(x)==float:
X_list[i]='Inconnu'
self.X['gene']=X_list
valeurs=['LRRK2+','No Mutation','GBA+','OTHER+','Inconnu']
self.X['est_LRRK2+']=self.X['gene'].apply(lambda x: f_l(x))
self.X['est_GBA+']=self.X['gene'].apply(lambda x: f_g(x))
self.X['est_OTHER+']=self.X['gene'].apply(lambda x: f_o(x))
self.X.drop('gene',axis=1,inplace=True)
return self.X
def get_X(self):
return self.X
preprocess4 = PreprocessData('data/X_train_6ZIKlTY.csv', 'data/y_train_lXj6X5y.csv')
print(preprocess4.get_X().head(30))
\ No newline at end of file
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": []
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
%% Cell type:code id: tags:
``` python
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
X = pd.read_csv('data/X_train_6ZIKlTY.csv')
y = pd.read_csv('data/y_train_lXj6X5y.csv')
```
%% Cell type:code id: tags:
``` python
X.head()
```
%% Output
Index patient_id cohort sexM gene age_at_diagnosis age ledd \
0 0 IPLP5212 A 0 LRRK2+ 48.5 52.1 607.0
1 1 IPLP5212 A 0 LRRK2+ 48.5 53.0 666.0
2 2 IPLP5212 A 0 LRRK2+ 48.5 53.9 717.0
3 3 IPLP5212 A 0 LRRK2+ 48.5 54.8 770.0
4 4 IPLP5212 A 0 LRRK2+ 48.5 56.9 885.0
time_since_intake_on time_since_intake_off on off
0 1.9 NaN 7.0 NaN
1 1.9 17.6 12.0 44.0
2 1.2 NaN 6.0 NaN
3 1.5 NaN 11.0 NaN
4 0.3 NaN 24.0 NaN
%% Cell type:code id: tags:
``` python
X['off'].isna().sum()
```
%% Output
23407
%% Cell type:code id: tags:
``` python
from preprocess import PreprocessData
preprocess4 = PreprocessData('data/X_train_6ZIKlTY.csv', 'data/y_train_lXj6X5y.csv')
```
%% Cell type:code id: tags:
``` python
```
%% Output
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Cell In[9], line 1
----> 1 preprocess4.get_X()
AttributeError: 'PreprocessData' object has no attribute 'get_X'
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment