Never been to CodeSnippets before?

Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world (or not, you can keep them private!)

About this user

Paolo

1 total

On This Page:

  1. 1 task_builder

task_builder

""" functions for create single tasks from a distance matrix and a cast matrix

 (0 = not present; 1 = positive train; 2 = negative train; 3 = positive test; 4 = negative test).

        PTr NTr                 PTe NTe
       ---------               ---------
  PTr  | 1 | 2 | PTr       PTr | 1 | 2 | PTr
       ---------               ---------
  NTr  | 3 | 4 | NTr       NTr | 3 | 4 | NTr
       ---------               ---------
        PTr NTr                 PTe NTe        
"""

from numpy import *
from scipy import *

def gettaskID(cm, nfam = int(0)):
    '''seleziono le IDs corrispendonti alla colonna (task) nfam'''
    ids1=[]
    ids2=[]
    ids3=[]
    ids4=[]
    for j in range(0,131):
#        print cm[j][nfam]=="1"
#        print "cm[",j,"][0]=",cm[j][nfam]               
        if(cm[j][nfam]==1):
            ids1.extend([j])
        if(cm[j][nfam]==2):
            ids2.extend([j])
        if(cm[j][nfam]==3):
            ids3.extend([j])
        if(cm[j][nfam]==4):
            ids4.extend([j])
    return ids1,ids2,ids3,ids4

def get_labels_old(pos_threshold = 11, lnum = 74):
    labels = []
    for i in range(0,74):
        if(i<=pos_threshold):
            labels.append(1)
            i = i + 1
        else:
            labels.append(0)
    return labels

def get_labels_train(cm, nfam = int(0)):
    labels = []
    ptrain = 0
    ntrain = 0
    ptest = 0
    ntest = 0
    #for j in range(0,131):
    for j in range(0,len(cm)):
        if(cm[j][nfam]==1):
            ptrain = ptrain + 1
        if(cm[j][nfam]==2):
            ntrain = ntrain + 1
        if(cm[j][nfam]==3):
            ptest = ptest + 1
        if(cm[j][nfam]==4):
            ptest = ptest + 1
    for i in range(0,ptrain + ntrain):
        if(i<=ptrain):
            labels.append(1)
            i = i + 1
        else:
            labels.append(0)
    return labels

def get_labels_test(cm, nfam = int(0)):
    labels = []
    ptest = 0
    ntest = 0
    for j in range(0,len(cm)):
        if(cm[j][nfam]==3):
            ptest = ptest + 1
        if(cm[j][nfam]==4):
            ntest = ntest + 1
    for i in range(0,ptest + ntest):
        if(i<=ptest):
            labels.append(1)
            i = i + 1
        else:
            labels.append(0)
    return labels

def train_builder(dm, ids):
    ''' Creo il training set a partire dagli IDs nella cm '''
    train1 = []
    train2 = []
    train3 = []
    train4 = []
    # tr1 ids = (0,0)
    for i in ids[0]:
        for j in ids[0]:
            train1.extend([dm[i][j]])
            #print "dm[",i,"][",j,"]=",dm[i][j]
    atrain1 = array(train1)
    atrain1 = atrain1.reshape((len(ids[0]),len(ids[0])))
    #print atrain1.shape
    # tr2 ids  = (0,1)
    for i in ids[0]:
        for j in ids[1]:
            train2.extend([dm[i][j]])            
            #print "dm[",i,"][",j+1,"]=",dm[i][j]
    atrain2 = array(train2)
    atrain2 = atrain2.reshape((len(ids[0]),len(ids[1])))
    #print atrain2.shape
    # tr3 ids = (1,0)
    for i in ids[1]:
        for j in ids[0]:
            train3.extend([dm[i][j]])
            #print "dm[",i,"][",j+1,"]=",dm[i][j]
    atrain3 = array(train3)
    atrain3 = atrain3.reshape((len(ids[1]),len(ids[0])))
    #print atrain3.shape
    # tr4 ids = (1,1)
    for i in ids[1]:
        for j in ids[1]:
            train4.extend([dm[i][j]])
            #print "dm[",i,"][",j+1,"]=",dm[i][j]
    atrain4 = array(train4)
    atrain4 = atrain4.reshape((len(ids[1]),len(ids[1])))
    #print atrain4.shape  
    # Stacking together different arrays - Questo fa la magia
    atrain12 = hstack((atrain1,atrain2))
    atrain34 = hstack((atrain3,atrain4))
    atrain = vstack((atrain12,atrain34))                  
    return atrain

def test_builder(dm, ids):
    ''' Creo il test set a partire dagli IDs nella cm '''
    test1 = []
    test2 = []
    test3 = []
    test4 = []
    # test1 ids = (0,2)
    for i in ids[0]:
        for j in ids[2]:
            test1.extend([dm[i][j]])
    atest1 = array(test1)
    atest1 = atest1.reshape((len(ids[0]),len(ids[2])))
    #print atest1.shape
    # test2 ids  = (0,3)
    for i in ids[0]:
        for j in ids[3]:
            test2.extend([dm[i][j]])            
    atest2 = array(test2)
    atest2 = atest2.reshape((len(ids[0]),len(ids[3])))
    #print atest2.shape
    # test3 ids = (1,2)
    for i in ids[1]:
        for j in ids[2]:
            test3.extend([dm[i][j]])
    atest3 = array(test3)
    atest3 = atest3.reshape((len(ids[1]),len(ids[2])))
    #print atest3.shape
    # test4 ids = (1,3)
    for i in ids[1]:
        for j in ids[3]:
            test4.extend([dm[i][j]])
    atest4 = array(test4)
    atest4 = atest4.reshape((len(ids[1]),len(ids[3])))
    #print atest4.shape   
    # Stacking together different arrays
    atest12 = hstack((atest1,atest2))
    #print size(atest12)
    atest34 = hstack((atest3,atest4))
    #print size(atest34)
    atest = vstack((atest12,atest34))
    #print size(atest)
#    print atest12.shape
#    print atest34.shape
#    print atest.shape          
                       
    return atest

# ------------------------------------------------------------------------------
''' reading the files in as arrays - 2 way '''

dm_filename = ('../data/3PGK_DNA_BLAST_nolabels.txt')
cm_filename = ("../data/3PGK_30_nolabels.txt")
# 1. Using ScyPy
scipydati = io.array_import.read_array(dm_filename)
# 2. Using array
def read_array(filename):
    f = open(filename,'rb')
    dati = []
    for line in f.readlines():
        numbers = map(int, line.split())
        dati.append(numbers)
    f.close()
    return dati  
# --------------------------------------------------------------
''' Task Creation for task = NFAM'''

NFAM = 0
cm = read_array(cm_filename)
IDs = gettaskID(cm, nfam = NFAM)
dm = read_array(dm_filename)
trainset = train_builder(scipydati, ids=IDs)
testset = test_builder(scipydati, ids=IDs)
trainlabels = get_labels_train(cm, nfam = NFAM)
testlabels = get_labels_test(cm, nfam = NFAM)
#trainset = train_builder(dm, ids=IDs)
#testset = test_builder(dm, ids=IDs)

# --------------------------------------------------------------
''' write the trainset and the testset to a csv file'''
import csv

flag = 0
if (flag==1):    
    # last row contains class labels
    trainsetplus = vstack((trainset, trainlabels))
    testsetplus = vstack((testset, testlabels))
    trainsetplus_tr = transpose(trainsetplus)
    testsetplus_tr = transpose(testsetplus)
    writer = csv.writer(open("trainset_"+str(NFAM)+".csv", "wb"))
    writer.writerows(trainsetplus_tr)
    writer = csv.writer(open("testset_"+str(NFAM)+".csv", "wb"))
    writer.writerows(testsetplus_tr)
else: print"non salvo nulla"

# --------------------------------------------------------------

from svm import *

problem = svm_problem(trainlabels,trainset)
#param_linear_10 = svm_parameter(kernel_type = LINEAR, C = 10, svm_type = C_SVC)
param_rbf_10 = svm_parameter(kernel_type = RBF, C = 10, svm_type = C_SVC)
#m_lin = svm_model(prob, param_linear_10)
m_rbf = svm_model(problem, param_rbf_10)

#from cross_validation import *
#do_cross_validation(trainset, trainlabels, param_rbf_10, 10)
#do_cross_validation(trainset, trainlabels, param_linear_10, 10)

size = len(trainset)
kernels = [LINEAR, POLY, RBF]
kname = ['linear','polynomial','rbf']

param = svm_parameter(C = 10,svm_type = C_SVC)
for k in kernels:
    param.kernel_type = k;
    model = svm_model(problem,param)
    errors = 0
    for i in range(size):
        prediction = model.predict(trainset[i])
        probability = model.predict_probability
        if (trainlabels[i] != prediction):
            errors = errors + 1
    print "##########################################"
    print " kernel %s: error rate = %d / %d" % (kname[param.kernel_type], errors, size)
    print "##########################################"
1 total

On This Page:

  1. 1 task_builder