-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtsvm.py
104 lines (94 loc) · 3.18 KB
/
tsvm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import numpy as np
import pandas as pd
import subprocess
import os
from sklearn.datasets import dump_svmlight_file
import aux_functions as af
def create_folders(db_name, num_exp):
try:
os.mkdir("output")
except FileExistsError:
pass
try:
os.mkdir("output/"+db_name)
except FileExistsError:
pass
try:
os.mkdir("output/"+db_name+"/tsvm")
except FileExistsError:
pass
if num_exp is not None:
subpath = db_name+"/tsvm/"+str(num_exp)
try:
os.mkdir("output/" + subpath)
except FileExistsError:
pass
else:
subpath = db_name + "/tsvm"
try:
os.mkdir("output/"+subpath+"/files")
except FileExistsError:
pass
try:
os.mkdir("output/"+subpath+"/models")
except FileExistsError:
pass
try:
os.mkdir("output/"+subpath+"/predictions")
except FileExistsError:
pass
def get_pred_values(path):
y = pd.read_csv(path, sep=' ', header=None)
y = y[0]
y = y.apply(lambda x: int(str.split(x, ':')[1]), 1)
return y
def ova_tsvm(x_l, y_l, x_u, y_u, db_name="tmp", num_exp=None, timeout=None):
K = len(np.unique(y_l))
x_train, y_train, y_u_shuffled = af.partially_labeled_view(x_l, y_l, x_u, y_u)
# create folders for storing results
create_folders(db_name, num_exp)
if num_exp is not None:
subpath = db_name + "/tsvm/" + str(num_exp)
else:
subpath = db_name + "/tsvm"
ovapreds = []
for k in range(K):
y_train_k = np.array(list(map(lambda label: 1 if label == k else (0 if label == -1 else -1), y_train)))
path_file = "output/" + subpath + "/files/df_class_" + str(k)
path_model = "output/" + subpath + "/models/model_class_" + str(k)
path_prediction = "output/" + subpath + "/predictions/pred_class_" + str(k)
open(path_file, 'a').close()
dump_svmlight_file(x_train, y_train_k, path_file, zero_based=False)
# form a run command to create process of learning tsvm
cmd = form_cmd(path_file, path_model, path_prediction)
try:
subprocess.run(cmd, stdout=subprocess.PIPE, timeout=timeout)
ovapreds.append(get_pred_values(path_prediction))
except subprocess.TimeoutExpired:
raise TimeoutError("The algorithm has not converged!")
ovapreds = np.array(ovapreds).T
y_pred = np.apply_along_axis(ova_voting, 1, ovapreds)
return y_u_shuffled, y_pred
def ova_voting(preds_for_x):
inds = [idx for idx in range(len(preds_for_x)) if preds_for_x[idx] == 1]
if inds == []:
return np.random.choice(np.arange(len(preds_for_x)), 1)[0]
else:
return np.random.choice(inds, 1)[0]
def form_cmd(path_file, path_model, path_prediction):
cmd = list()
# binary for learning
cmd.append('./svm_light/svm_learn')
# these options can be tried
# cmd.append('-n')
# cmd.append('5')
# cmd.append('-e')
# cmd.append('0.01')
# classify unlabelled examples to the following file
cmd.append('-l')
cmd.append(path_prediction)
# data path
cmd.append(path_file)
# a file for the learning model
cmd.append(path_model)
return cmd