| | import numpy as np |
| | import csv |
| |
|
| | def load_GO_annot(filename): |
| | |
| | onts = ['mf', 'bp', 'cc'] |
| | prot2annot = {} |
| | goterms = {ont: [] for ont in onts} |
| | gonames = {ont: [] for ont in onts} |
| | with open(filename, mode='r') as tsvfile: |
| | reader = csv.reader(tsvfile, delimiter='\t') |
| |
|
| | |
| | next(reader, None) |
| | goterms[onts[0]] = next(reader) |
| | next(reader, None) |
| | gonames[onts[0]] = next(reader) |
| |
|
| | |
| | next(reader, None) |
| | goterms[onts[1]] = next(reader) |
| | next(reader, None) |
| | gonames[onts[1]] = next(reader) |
| |
|
| | |
| | next(reader, None) |
| | goterms[onts[2]] = next(reader) |
| | next(reader, None) |
| | gonames[onts[2]] = next(reader) |
| |
|
| | next(reader, None) |
| | counts = {ont: np.zeros(len(goterms[ont]), dtype=float) for ont in onts} |
| | for row in reader: |
| | prot, prot_goterms = row[0], row[1:] |
| | prot2annot[prot] = {ont: [] for ont in onts} |
| | for i in range(3): |
| | goterm_indices = [goterms[onts[i]].index(goterm) for goterm in prot_goterms[i].split(',') if goterm != ''] |
| | prot2annot[prot][onts[i]] = np.zeros(len(goterms[onts[i]])) |
| | prot2annot[prot][onts[i]][goterm_indices] = 1.0 |
| | counts[onts[i]][goterm_indices] += 1.0 |
| | return prot2annot, goterms, gonames, counts |
| |
|
| |
|
| | def load_EC_annot(filename): |
| | |
| | prot2annot = {} |
| | with open(filename, mode='r') as tsvfile: |
| | reader = csv.reader(tsvfile, delimiter='\t') |
| |
|
| | |
| | next(reader, None) |
| | ec_numbers = {'ec': next(reader)} |
| | next(reader, None) |
| | counts = {'ec': np.zeros(len(ec_numbers['ec']), dtype=float)} |
| | for row in reader: |
| | prot, prot_ec_numbers = row[0], row[1] |
| | ec_indices = [ec_numbers['ec'].index(ec_num) for ec_num in prot_ec_numbers.split(',')] |
| | prot2annot[prot] = {'ec': np.zeros(len(ec_numbers['ec']), dtype=np.int64)} |
| | prot2annot[prot]['ec'][ec_indices] = 1.0 |
| | counts['ec'][ec_indices] += 1 |
| |
|