Add scripts to generate reference DBs for LSC and SSC

Former-commit-id: 77fd69c9687bfc7cfd3305299c7c81d2a9eddada
Former-commit-id: 4d09933673c30d6a9b2b9cc985b33ef23d7a99f4
This commit is contained in:
2015-10-07 11:32:17 -03:00
parent fb1158bcd4
commit 37e1ecc9fd
8 changed files with 530 additions and 0 deletions

48
detectors/normalize/tools/cc.py Executable file
View File

@ -0,0 +1,48 @@
#!/usr/bin/env python
import sys
data = open(sys.argv[1])
ccs = []
for line in data:
parts = line.strip().split()
if len(parts) >= 2:
a = parts[0]
b = parts[1]
else:
continue
newcc=set([a,b])
keep=set()
found=set()
for i in range(len(ccs)):
if len(found) < 2:
cc=ccs[i]
if a not in found and a in cc:
found.add(a)
keep.add(i)
if b not in found and b in cc:
found.add(b)
keep.add(i)
for i in keep:
newcc |= ccs[i]
newccs=[newcc]
for i in range(len(ccs)):
if i not in keep:
newccs.append(ccs[i])
ccs=newccs
ccs.sort(key=len, reverse=True)
for i in range(len(ccs)):
cc=ccs[i]
for l in cc:
sys.stdout.write("%d %s\n" % (i,l))