#!/usr/bin/env python import pandas as pd import numpy as np from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score print('\nHuman Activity Recognition benchmark.') # read data print('\nReading data...') # la carpeta de los datos mi_carpeta = 'rellenar_ruta_completa' X_train = pd.read_csv(mi_carpeta+'/X_train.csv') y_train = pd.read_csv(mi_carpeta+'/y_train.csv') X_test = pd.read_csv(mi_carpeta+'/X_test.csv') # save test IDs test_id = X_test['ID'] # drop ID column for training X_train = X_train.drop(['ID'], axis=1) X_test = X_test.drop(['ID'], axis=1) # print some stats print('\nTraining data: %d instances, %d features' % X_train.shape) print('Test data: %d instances, %d features' % X_test.shape) print("Unique target labels:", np.unique(y_train['TARGET'])) # encode target variables for multiclass learning y_train = y_train['TARGET'].ravel() # initialize classifier clf = RandomForestClassifier(random_state=0) # train classifier clf.fit(X_train,y_train) # test classifier y_pred = clf.predict(X_test) # save the results as a submission file d = {'ID': test_id, 'TARGET': y_pred} # datos en formato diccionario y_pred_df = pd.DataFrame(data=d) # datos en formato dataframe y_pred_df.to_csv('my_submission.csv',sep=',',index=None)