test-feature-tool/test.py
2020-03-31 18:05:47 +07:00

54 lines
1.7 KiB
Python

import json
import random
import numpy as np
data = []
with open("features_1k.json") as file_json:
data = json.load(file_json)
print(len(data))
random.shuffle(data)
def calculate_ratio(split_ratio, thresh_hold):
# split_ratio = 0.2
# thresh_hold = 0.7
nTest = int (len(data) * split_ratio)
nTrain = len(data) - nTest
test_data = np.array([x["feature"] for x in data[:nTest]])
train_data = np.array([x["feature"] for x in data[nTest:]])
# print(split_ratio, test_data.shape, train_data.shape)
min_dists = []
for i, row in enumerate(test_data):
row = np.reshape(row, (1, row.shape[0]))
repeat_row = np.tile(row, (nTrain, 1))
dists = repeat_row - train_data
dists = np.sqrt(np.sum(np.square(dists), axis=1))
# print(dists)
min_dist_idx = np.argmin(dists)
# print(i, min_dist_idx, dists[min_dist_idx])
min_dists.append(dists[min_dist_idx])
min_dists = np.array(min_dists)
# print(min_dists.shape)
# print(min_dists[min_dists > 0.75].shape)
t1 = min_dists[min_dists > thresh_hold].shape[0]
t2 = min_dists.shape[0]
print('\tthresh:', thresh_hold, '\tratio:', t1/t2, '(', t1, '/', t2 ,')')
return t1/t2
for j in range(10, 60, 5):
split = j / 100
print('\nSplit test/train:', split*10, '/', 10-split*10, '====================================')
best_thresh = 0
best_ratio = 0
for i in range(50, 110, 2):
thresh = i / 100
ratio = calculate_ratio(split, thresh)
if ratio >= best_ratio:
best_ratio = ratio
best_thresh =thresh
print('\tBEST THRESH: ', best_thresh, 'with ratio ', best_ratio)