First commit - second part

Former-commit-id: 202296404e6a70f8ae96db99faffb456104c57e9
Former-commit-id: 118417735d2055683607df9809c9b721cc1b1bab
This commit is contained in:
2015-10-02 21:12:35 +02:00
parent f44f0d8179
commit d298385685
316 changed files with 122579 additions and 0 deletions

BIN
src/sequtils/.DS_Store vendored Normal file

Binary file not shown.

33
src/sequtils/Makefile Executable file
View File

@ -0,0 +1,33 @@
# ---------------------------------------------------------------
# $Id: $
# ---------------------------------------------------------------
# @file: Makefile
# @desc: makefile for lxpack
#
# @history:
# @history:
# @+ <Gloup> : Apr 97 : Created
# @+ <Gloup> : Mar 02 : Updated for LXxware
#
# @note: should be processed with gnu compatible make
# @note: helixware_compatible
#
# @end:
# ---------------------------------------------------------------
#
include ../../config/auto.conf
DIRS = lxpack
include ../../config/targets/propagate.targ
include ../../config/targets/help.targ
all::
$(MAKE) ACTION=$@ _action
test -d $(PRTDIR) || mkdir $(PRTDIR)
test -d $(BINDIR) || mkdir $(BINDIR)
\cp -f lxpack/ports/$(PORTNAME)/bin/* $(BINDIR)
clean::
$(MAKE) -C lxpack portclean

BIN
src/sequtils/lxpack/.DS_Store vendored Normal file

Binary file not shown.

28
src/sequtils/lxpack/Makefile Executable file
View File

@ -0,0 +1,28 @@
# ---------------------------------------------------------------
# $Id: $
# ---------------------------------------------------------------
# @file: Makefile
# @desc: makefile for sequtils
#
# @history:
# @history:
# @+ <Gloup> : Apr 97 : Created
# @+ <Gloup> : Mar 02 : Updated for LXxware
#
# @note: should be processed with gnu compatible make
# @note: helixware_compatible
#
# @end:
# ---------------------------------------------------------------
#
include ./config/auto.conf
DIRS = src
include ./config/targets/propagate.targ
include ./config/targets/help.targ
portclean::
$(MAKE) ACTION=$@ _action
(! test -d $(PRTDIR)) || \rm -r $(PRTDIR)

View File

@ -0,0 +1,51 @@
$Id: README.txt 1825 2013-02-26 09:39:47Z viari $
This directory contains Makefile machine specific configuration files
(and default targets to help you writing Makefile's)
These headers should be used with GNU make or compatible
#
# portname
#
To check your port, issue :
./guess_port
if output is 'unknown <mach>:<sys>:<rel>' then you should :
- add a port entry in guess_port for <mach>:<sys>:<rel>
- create a ports/<port>.conf configuration file
(the best is to start from another port file,
choose whatever looks closest)
#
# configuration flags
#
auto.conf : the main configuration file :
- determine the machine port thru 'guess_port' shell
- include 'default.conf' file
- include the machine specific 'ports/<port>.conf' file
default.conf : default configuration (included by 'auto.conf')
ports/<port>.conf : machine specific configuration (included by 'auto.conf')
#
# utility targets
#
targets/help.targ : target for standard help
targets/propagate.targ : target for propagating targets to subdirectories
targets/package.targ : default targets for standard package with 'configure'
targets/empty.targ : default empty targets (defined as double colon rules)
targets/lxbin.targ : default make targets for standard lx binary (without libraries)
targets/debug.targ : target to print debug information (for dev.)

View File

@ -0,0 +1,54 @@
#
# $Id: auto.conf 1825 2013-02-26 09:39:47Z viari $
#
# auto.conf
# auto configuration file using guess_port
#
# this file is included in Makefile
#
#
# default shell for gnu-make
#
SHELL = /bin/sh
#
# CFGDIR : location of config files = this file directory location
#
# CFGPRT : port name (as returned by guess_port)
#
# because builtin 'lastword' is missing in gnu-make 3.80
lastword = $(word $(words $1), $1)
CFGDIR := $(dir $(call lastword, $(MAKEFILE_LIST)))
CFGPRT := $(shell $(CFGDIR)guess_port)
# check if port is correctly defined
ifneq (1, $(words $(CFGPRT)))
entry := $(call lastword, $(CFGPRT))
$(error port is undefined - add entry for "$(entry)" in configuration file -)
endif
#
# PORTNAME : port name to use : default is CFGPRT but may be futher modified
# by machine specific configuration
PORTNAME = $(CFGPRT)
#
# default configuration
# may be overriden by machine dependant definitions below
#
include $(CFGDIR)default.conf
#
# machine dependant definitions
#
include $(CFGDIR)ports/$(CFGPRT).conf

View File

@ -0,0 +1,124 @@
#
# $Id: default.conf 2007 2013-12-03 14:21:39Z viari $
#
# default.conf
# default configuration flags
# maybe further redefined by machine specific configuration
#
# this file is included by auto.conf
#
# ------------------------------------
# General compilation flags
# ------------------------------------
#
# MACHDEF : define machine and OS specific flags
#
MACHDEF =
#
# CC : (ansi C) compiler command to use
# you may add some machine specific flags (like -arch ...)
# in the <machine>.conf configuration file
#
CC = gcc
#
# default compiler optimizer flag
#
OPTIM = -O
#
# CC_LIBS : additionnal machine specific $(CC) libraries
# like '-lC' on some machines
#
CC_LIBS =
#
# MALLOC_LIBS : machine specific malloc librairies
# like '-lmalloc' on SGI
#
MALLOC_LIBS =
#
# MATH_LIBS : machine specific math librairies
# like '-lm' on Solaris
#
MATH_LIBS =
#
# LINT : looks like LINT command does not exist anymore
# here is a rough replacement
#
LINT = gcc -S -Wall -Wno-format-y2k -W -Wstrict-prototypes \
-Wmissing-prototypes -Wpointer-arith -Wreturn-type \
-Wcast-qual -Wwrite-strings -Wswitch -Wshadow \
-Wcast-align -Wbad-function-cast -Wchar-subscripts \
-Winline -Wnested-externs -Wredundant-decls
# ------------------------------------
# General system commands
# ------------------------------------
#
# AR : AR archive command
# ARFLAGS : $(AR) archiving flags
# ARXFLAGS : $(AR) extraction flags
#
AR = ar
ARFLAGS = rcv
ARXFLAGS = xv
#
# RANLIB : ranlib command
#
RANLIB = ranlib
#
# DIFF : diff command
#
DIFF = diff
#
# TAR : tar command
#
TAR = tar
# ------------------------------------
# Default locations
# ------------------------------------
#
# PRTDIR : port dependent files location (libraries and binaries)
# BINDIR : port binaries
# LIBDIR : port libraries
#
PRTDIR = $(CFGDIR)../ports/$(PORTNAME)
BINDIR = $(PRTDIR)/bin
LIBDIR = $(PRTDIR)/lib
# ------------------------------------
# default gmake variable in implicit rules
# ------------------------------------
CFLAGS = $(OPTIM) $(MACHDEF) -I$(INCDIR)
LDFLAGS = -L$(LIBDIR) -L.
LDLIBS = $(LIBS) $(MALLOC_LIBS) $(MATH_LIBS) $(CC_LIBS)
LINTFLAGS = $(MACHDEF) -I$(INCDIR)

View File

@ -0,0 +1,33 @@
#! /bin/sh
#
# $Id: guess_port 1825 2013-02-26 09:39:47Z viari $
#
# @file: guess_port
# @desc: attempt to guess the portname
# @usage: guess_port
#
# @history:
# @+ <Gloup> Nov. 2000 first draft adapted from GNU config.guess
# @+ <Gloup> Feb. 2010 moved to sh
#
mach=`uname -m`
syst=`uname -s`
rels=`uname -r`
case ${mach}:${syst}:${rels} in
alpha:OSF1:* ) echo alpha-osf1;;
sun4*:SunOS:5.* ) echo sparc-solaris;;
i86pc:SunOS:5.* ) echo i386-solaris;;
sun4*:SunOS:* ) echo sparc-sunos;;
Power*:Darwin:* ) echo ppc-darwin;;
i*86:Linux:* ) echo i386-linux;;
x*86*:Linux:* ) echo i386-linux;;
i*86:Darwin:* ) echo i386-darwin;;
IP*:IRIX*:* ) echo mips-irix;;
i*86:MINGW32*:* ) echo x86-mingw32;;
*) echo unknown ${mach}:${syst}:${rels}; exit 1;;
esac
exit 0

View File

@ -0,0 +1,26 @@
#
# $Id: i386-darwin.conf 1825 2013-02-26 09:39:47Z viari $
#
# i386-darwin.conf
# configuration file for MacOS-X/Intel-Based/Darwin 1.2 with gcc compiler
# this file is included in Makefile
#
# system (uname -srp) : Darwin 8.7.1 i386
# compiler (cc --version) : i686-apple-darwin8-gcc-4.0.1
#
# check tags
# @uname:uname -srp:Darwin 8.7.1 i386
# @cc:cc --version:i686-apple-darwin8-gcc-4.0.1
#
#
# ------------------------------------
# General compilation flags
# ------------------------------------
#
# MACHDEF : define machine and OS specific flags
#
MACHDEF = -DLX_TARGET_MACINTEL -DLITTLE_ENDIAN -DMACOSX

View File

@ -0,0 +1,32 @@
#
# $Id: i386-linux.conf 1825 2013-02-26 09:39:47Z viari $
#
# i386-linux.conf
# configuration file for linux ix86 with GNU gcc compiler
# this file is included in Makefile
#
# system (uname -srp) : Linux 2.2.14-5.0 unknown
# compiler (gcc --version) : egcs-2.91.66
#
# check tags
# @uname:uname -srp:Linux 2.2.14-5.0 unknown
# @cc:cc --version:egcs-2.91.66
#
#
# ------------------------------------
# General compilation flags
# ------------------------------------
#
# MACHDEF : define machine and OS specific flags
#
MACHDEF = -DLX_TARGET_LINUX -DLITTLE_ENDIAN
#
# MATH_LIBS : machine specific math librairies
#
MATH_LIBS = -lm

View File

@ -0,0 +1,32 @@
#
# $Id: ppc-darwin.conf 1825 2013-02-26 09:39:47Z viari $
#
# ppc-darwin.conf
# configuration file for MacOS-X/Darwin 1.2 with native cc compiler
# this file is included in Makefile
#
# system (uname -srp) : Darwin 1.2 powerpc
# compiler (cc --version) : 2.7.2.1
#
# check tags
# @uname:uname -srp:Darwin 1.2 powerpc
# @cc:cc --version:2.7.2.1
#
#
# ------------------------------------
# General compilation flags
# ------------------------------------
#
# MACHDEF : define machine and OS specific flags
#
MACHDEF = -DLX_TARGET_MACPPC -DBIG_ENDIAN
#
# CC : name of (ansi C) compiler to use
#
CC = cc -arch ppc

View File

@ -0,0 +1,31 @@
#
# $Id: sparc-solaris.conf 1825 2013-02-26 09:39:47Z viari $
#
# sparc-solaris.conf
# configuration file for sparc solaris with GNU gcc compiler
# this file is included in Makefile
#
# system (uname -srp) : SunOS 5.8 sparc
# compiler (gcc --version) : 2.95.2
#
# check tags
# @uname:uname -srp:SunOS 5.8 sparc
# @cc:cc --version:2.95.2
#
#
# ------------------------------------
# General compilation flags
# ------------------------------------
#
# MACHDEF : define machine and OS specific flags
#
MACHDEF = -DLX_TARGET_SOLARIS -DBIG_ENDIAN
#
# MATH_LIBS : machine specific math librairies
#
MATH_LIBS = -lm

View File

@ -0,0 +1,54 @@
#
# $Id: x86-mingw32.conf 1825 2013-02-26 09:39:47Z viari $
#
# x86-mingw32
# configuration file for MinGW with GNU gcc compiler.
#
# this file is included in Makefile
#
#
#
# rename PORTNAME safely since MinGW produce pure win32 executables
# without dll's
#
PORTNAME = x86-win32
# ------------------------------------
# General compilation flags
# ------------------------------------
#
# CC_LIBS : additionnal machine specific $(CC) libraries
#
# libiberty is needed for some system extensions (like mkstemps)
#
CC_LIBS = -liberty
#
# MACHDEF : define machine and OS specific flags
#
# -DDLMALLOC : use dlmalloc instead of malloc (which does not have mallinfo)
# -posix is a new replacement for several MinGW32 flags, including:
# -D__USE_MINGW_ANSI_STDIO : mingw gcc flag to recognize the C99 "%zu" format
#
MACHDEF = -posix -DLX_TARGET_WIN32 -DWIN_MINGW -DDLMALLOC -DLITTLE_ENDIAN
#
# MATH_LIBS : machine specific math librairies
#
MATH_LIBS = -lm
# ------------------------------------
# General system commands
# ------------------------------------
#
# DIFF : diff command / should ignore cr on windows
#
DIFF = diff --strip-trailing-cr

View File

@ -0,0 +1,25 @@
#
# $Id: help.targ 1825 2013-02-26 09:39:47Z viari $
#
# debug.targ
#
# target to print debug information (dev. only)
#
# it defines the following targets:
#
# debug :
# print debug
#
# it requires auto.conf
#
.PHONY: debug
debug::
@echo "+ PORTNAME: $(PORTNAME)"
@echo "+ CFGPRT: $(CFGPRT)"
@echo "+ CFGDIR: $(CFGDIR)"
@echo "+ PRTDIR: $(PRTDIR)"
@echo "+ MACHDEF: $(MACHDEF)"

View File

@ -0,0 +1,24 @@
#
# $Id: $
#
# epty.targ
#
# default empty targets (defined as double colon rules)
#
#
#
# Rules
#
.PHONY: all test clean portclean help
all::
test::
clean::
portclean:: clean
test::

View File

@ -0,0 +1,23 @@
#
# $Id: help.targ 1825 2013-02-26 09:39:47Z viari $
#
# help.targ
#
# default target to print help
#
# it defines the following targets:
#
# help :
# print help
#
.PHONY: help
help::
@ echo "basic usage: make [<action>+]"
@ echo "valid <action> :"
@ echo " all : compile everything for current port [default target]"
@ echo " clean : local cleanup"
@ echo " portclean : cleanup distribution for current port"
@ echo " test : run tests on current port"
@ echo " help : print this help"

View File

@ -0,0 +1,51 @@
#
# $Id: $
#
# lxbin.targ
#
# default make targets for standard lx binary
#
# you should define the 'PROGS' and 'OSRC' variables
# and optionnaly 'LIBS' if binaries have to be linked with libraries
#
# note: if main source code for binary PROG is PROG.c, there is nothing to do,
# else (e.g. if it involves several sources files) you should also add local
# file dependencies. e.g under the form:
#
# mymain: $(OBJ) mymain_base.c mymain_help.c
# $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LDLIBS)
#
#
# 'auto.conf' should have been included
#
OBJ = $(OSRC:.c=.o)
INCDIR = ../include
#
# Rules
#
.PHONY: all prelib install test clean portclean
all:: prelib $(PROGS) install
@echo "+++++++++++ binaries $(PROGS) done"
prelib::
test -d $(PRTDIR) || mkdir $(PRTDIR) # because some linker may complain
test -d $(LIBDIR) || mkdir $(LIBDIR) # if -L$(LIBDIR) does not exist
install::
test -d $(PRTDIR) || mkdir $(PRTDIR)
test -d $(BINDIR) || mkdir $(BINDIR)
-for f in $(PROGS) ; do \cp -f $$f $(BINDIR) ; done
test::
clean::
-\rm -f *.o cvstatic* *% *.bak so_loc*
-\rm -f $(PROGS)
portclean:: clean
-(! test -d $(BINDIR)) || (cd $(BINDIR) && \rm -f $(PROGS))

View File

@ -0,0 +1,43 @@
#
# $Id: $
#
# lxlib.targ
#
# default make targets for standard lx library
#
# you should define the 'LOCLIB' and 'OSRC' variables
#
# 'auto.conf' should have been included
#
OBJ = $(OSRC:.c=.o)
INCDIR = ../include
#
# Rules
#
.PHONY: all lib install test clean portclean
all:: lib install
@echo "+++++++++++ library $(LOCLIB) done"
lib:: $(OBJ)
$(AR) $(ARFLAGS) $(LOCLIB) $(OBJ)
$(RANLIB) $(LOCLIB)
install::
test -d $(PRTDIR) || mkdir $(PRTDIR)
test -d $(LIBDIR) || mkdir $(LIBDIR)
\cp -f $(LOCLIB) $(LIBDIR)
$(RANLIB) $(LIBDIR)/$(LOCLIB)
test::
clean::
-\rm -f *.o cvstatic* *% *.bak so_loc*
-\rm -f $(LOCLIB)
portclean:: clean
-(! test -d $(LIBDIR)) || (cd $(LIBDIR) && \rm -f $(LOCLIB))

View File

@ -0,0 +1,48 @@
#
# $Id: package.targ 1825 2013-02-26 09:39:47Z viari $
#
# package.targ
#
# default make targets for standard package with configure
#
# you should define the 'PKG' variable
# (and optionaly 'PKGTAR', 'PKGDIR')
#
PKGTAR ?= $(PKG).tgz
PKGDIR ?= build.$(PORTNAME)
PRTPATH = $(abspath $(PRTDIR))
#
# Rules
#
.PHONY: all clean test portclean pkg pkg.expand pkg.make pkg.install
all:: pkg
pkg.expand::
test -d $(PKGDIR) || mkdir $(PKGDIR)
test -f $(PKGDIR)/configure || $(TAR) zxf $(PKGTAR) -C $(PKGDIR) --strip-components 1
pkg.make:: pkg.expand
test -f $(PKGDIR)/Makefile || (cd $(PKGDIR) && ./configure --prefix=$(PRTPATH))
$(MAKE) -C $(PKGDIR)
pkg.install:: pkg.make
$(MAKE) -C $(PKGDIR) install
pkg:: pkg.install
@echo "+++++++++++ package $(PKG) done"
test::
(! test -d $(PKGDIR)) || $(MAKE) -C $(PKGDIR) test
clean::
(! test -d $(PKGDIR)) || $(MAKE) -C $(PKGDIR) clean
portclean::
(! test -d $(PKGDIR)) || $(MAKE) -C $(PKGDIR) distclean
(! test -d $(PKGDIR)) || \rm -r $(PKGDIR)

View File

@ -0,0 +1,30 @@
#
# $Id: propagate.targ 1825 2013-02-26 09:39:47Z viari $
#
# propagate.targ
#
# default make targets for library containers
#
# you should define the 'DIRS' variable
#
# It will propagate 'MAKE <target>' to all
# directories listed in DIRS
#
#
# Rules
#
.PHONY: all _action $(DIRS)
.DEFAULT:
$(MAKE) ACTION=$@ _action
all::
$(MAKE) ACTION=all _action
_action: $(DIRS)
@echo "$(ACTION) done"
$(DIRS):
$(MAKE) -C $@ $(ACTION)

View File

@ -0,0 +1,184 @@
/* ---------------------------------------------------------------- */
/* Copyright (c) Atelier de BioInformatique */
/* @file: Genetic.h */
/* @desc: Genetic codes / include file */
/* */
/* @history: */
/* @+ <Gloup> : Jan 96 : first draft for PWG from acnuc */
/* ---------------------------------------------------------------- */
#ifndef _H_Genetic
#define _H_Genetic
/* ==================================================== */
/* Constants */
/* ==================================================== */
#define GEN_NUC_ALPHA "AaCcGgTtUu"
#define GEN_PRO_ALPHA "RLSTPAGVKNQHEDYCFIMW*X"
#define GEN_MAX_CODES 9
#define GEN_CODE_UNIVL 0 /* Universal */
#define GEN_CODE_MYEAS 1 /* mito yeast */
#define GEN_CODE_MVERT 2 /* mito vertebrate */
#define GEN_CODE_FUNGI 3 /* filamentous fungi */
#define GEN_CODE_MINSE 4 /* mito insects & platyhelminthes */
#define GEN_CODE_CANDI 5 /* Candida cylindracea */
#define GEN_CODE_CILIA 6 /* Ciliata */
#define GEN_CODE_EUPLO 7 /* Euplotes */
#define GEN_CODE_MECHI 8 /* mito echinoderms */
/* ==================================================== */
/* Data Structures */
/* ==================================================== */
typedef struct GeneticCode {
char title[256]; /* nom du code */
char info[256]; /* informations */
int code[65]; /* 64 codons + Error */
} GeneticCode;
/* ==================================================== */
/* Data Instanciation */
/* ==================================================== */
#ifdef GENETIC_CODE_INSTANCE
GeneticCode theGeneticCode[GEN_MAX_CODES] = { /* Begin of codes */
{ /* 0: UNIVERSAL */
{"Universal"},
{""},
{ 8, 9, 8, 9, 3, 3, 3, 3,
0, 2, 0, 2, 17, 17, 18, 17,
10, 11, 10, 11, 4, 4, 4, 4,
0, 0, 0, 0, 1, 1, 1, 1,
12, 13, 12, 13, 5, 5, 5, 5,
6, 6, 6, 6, 7, 7, 7, 7,
20, 14, 20, 14, 2, 2, 2, 2,
20, 15, 19, 15, 1, 16, 1, 16,
21}
},
{ /* 1: MITOCHONDRIAL YEAST */
{"Mitochondrial Yeast"},
{"CUN=T AUA=M UGA=W"},
{ 8, 9, 8, 9, 3, 3, 3, 3,
0, 2, 0, 2, 18, 17, 18, 17,
10, 11, 10, 11, 4, 4, 4, 4,
0, 0, 0, 0, 3, 3, 3, 3,
12, 13, 12, 13, 5, 5, 5, 5,
6, 6, 6, 6, 7, 7, 7, 7,
20, 14, 20, 14, 2, 2, 2, 2,
19, 15, 19, 15, 1, 16, 1, 16,
21}
},
{ /* 2: MITOCHONDRIAL VERTEBRATES */
{"Mitochondrial Vertebrates"},
{"AGR=* AUA=M UGA=W"},
{ 8, 9, 8, 9, 3, 3, 3, 3,
20, 2, 20, 2, 18, 17, 18, 17,
10, 11, 10, 11, 4, 4, 4, 4,
0, 0, 0, 0, 1, 1, 1, 1,
12, 13, 12, 13, 5, 5, 5, 5,
6, 6, 6, 6, 7, 7, 7, 7,
20, 14, 20, 14, 2, 2, 2, 2,
19, 15, 19, 15, 1, 16, 1, 16,
21}
},
{ /* 3: MITOCHONDRIAL FILAMENTOUS FUNGI */
{"Mitochondrial Filamentous Fungi"},
{"UGA=W"},
{ 8, 9, 8, 9, 3, 3, 3, 3,
0, 2, 0, 2, 17, 17, 18, 17,
10, 11, 10, 11, 4, 4, 4, 4,
0, 0, 0, 0, 1, 1, 1, 1,
12, 13, 12, 13, 5, 5, 5, 5,
6, 6, 6, 6, 7, 7, 7, 7,
20, 14, 20, 14, 2, 2, 2, 2,
19, 15, 19, 15, 1, 16, 1, 16,
21}
},
{ /* 4: MITOCHONDRIAL CODE OF INSECT AND PLATYHELMINTHES */
{"Mitochondrial Insects and Platyhelminthes"},
{"AUA=M UGA=W AGR=S"},
{ 8, 9, 8, 9, 3, 3, 3, 3,
2, 2, 2, 2, 18, 17, 18, 17,
10, 11, 10, 11, 4, 4, 4, 4,
0, 0, 0, 0, 1, 1, 1, 1,
12, 13, 12, 13, 5, 5, 5, 5,
6, 6, 6, 6, 7, 7, 7, 7,
20, 14, 20, 14, 2, 2, 2, 2,
19, 15, 19, 15, 1, 16, 1, 16,
21}
},
{ /* 5: CANDIDA CYLINDRACEA (see nature 341:164) */
{"Candida cylindracea"},
{"CUG=S CUA=?"},
{ 8, 9, 8, 9, 3, 3, 3, 3,
0, 2, 0, 2, 17, 17, 18, 17,
10, 11, 10, 11, 4, 4, 4, 4,
0, 0, 0, 0, 21, 1, 2, 1,
12, 13, 12, 13, 5, 5, 5, 5,
6, 6, 6, 6, 7, 7, 7, 7,
20, 14, 20, 14, 2, 2, 2, 2,
20, 15, 19, 15, 1, 16, 1, 16,
21}
},
{ /* 6: CILIATA */
{"Ciliata"},
{"UAR=Q"},
{ 8, 9, 8, 9, 3, 3, 3, 3,
0, 2, 0, 2, 17, 17, 18, 17,
10, 11, 10, 11, 4, 4, 4, 4,
0, 0, 0, 0, 1, 1, 1, 1,
12, 13, 12, 13, 5, 5, 5, 5,
6, 6, 6, 6, 7, 7, 7, 7,
10, 14, 10, 14, 2, 2, 2, 2,
20, 15, 19, 15, 1, 16, 1, 16,
21}
},
{ /* 7: EUPLOTES */
{"Euplotes"},
{"UGA=C"},
{ 8, 9, 8, 9, 3, 3, 3, 3,
0, 2, 0, 2, 17, 17, 18, 17,
10, 11, 10, 11, 4, 4, 4, 4,
0, 0, 0, 0, 1, 1, 1, 1,
12, 13, 12, 13, 5, 5, 5, 5,
6, 6, 6, 6, 7, 7, 7, 7,
20, 14, 20, 14, 2, 2, 2, 2,
15, 15, 19, 15, 1, 16, 1, 16,
21}
},
{ /* 8: MITOCHONDRIAL ECHINODERMS */
{"Mitochondrial Echinoderms"},
{"UGA=W AGR=S AAA=N"},
{ 9, 9, 8, 9, 3, 3, 3, 3,
2, 2, 2, 2, 17, 17, 18, 17,
10, 11, 10, 11, 4, 4, 4, 4,
0, 0, 0, 0, 1, 1, 1, 1,
12, 13, 12, 13, 5, 5, 5, 5,
6, 6, 6, 6, 7, 7, 7, 7,
20, 14, 20, 14, 2, 2, 2, 2,
19, 15, 19, 15, 1, 16, 1, 16,
21}
}
/* end of codes */ };
#else
extern GeneticCode theGeneticCode[GEN_MAX_CODES];
#endif
#endif

View File

@ -0,0 +1,74 @@
/* ---------------------------------------------------------------- */
/* Copyright (c) Atelier de BioInformatique */
/* @file: libaabi.h */
/* @desc: Abi general purpose library / include file */
/* */
/* @history: */
/* @+ <Gloup> : Jan 96 : first draft for PWG */
/* ---------------------------------------------------------------- */
#ifndef _H_libaabi
#define _H_libaabi
/* ==================================================== */
/* Constants */
/* ==================================================== */
#ifndef __FILE__
#define __FILE__ "unknown file"
#endif
#ifndef __LINE__
#define __LINE__ 0
#endif
#define Vrai 1
#define Faux 0
#define TIME_NO_RESET 0
#define TIME_RESET 1
#define WARNING_LEVEL 1
#define FATAL_LEVEL 10
#define NO_ABORT_LEVEL 255
/* ==================================================== */
/* Macros standards */
/* ==================================================== */
#ifndef NEW
#define NEW(typ) (typ*)malloc(sizeof(typ))
#define NEWN(typ, dim) (typ*)malloc((size_t)(dim) * sizeof(typ))
#define REALLOC(typ, ptr, dim) (typ*)realloc((void *) (ptr), (size_t)(dim) * sizeof(typ))
#define FREE(ptr) free(ptr)
#endif
#define Error(message, level) abi_error(__FILE__, __LINE__, message, level)
#define W_Error(message) Error(message, WARNING_LEVEL)
#define F_Error(message) Error(message, FATAL_LEVEL)
#define MemoryError() F_Error("Not enough memory")
#define IOError() F_Error("IO Error")
/* ==================================================== */
/* Prototypes of library functions */
/* ==================================================== */
/* libaabi.c */
int abi_error (char *filename, int lineno, char *msg, int level);
int abi_last_error ();
void abi_clear_error (),
abi_set_abort_level (int level),
abi_play_rotator ();
double abi_user_cpu_time (int reset);
double abi_sys_cpu_time (int reset);
char *abi_str_cpu_time (int reset);
void abi_memory_info (char *header);
#endif

View File

@ -0,0 +1,69 @@
/* ---------------------------------------------------------------- */
/* Copyright (c) Atelier de BioInformatique */
/* @file: libbio.h */
/* @desc: bioseq & strings generic library / include file */
/* */
/* @history: */
/* @+ <Gloup> : Jan 96 : first draft for PWG */
/* ---------------------------------------------------------------- */
#ifndef _H_libbio
#define _H_libbio
/* ==================================================== */
/* Constants */
/* ==================================================== */
#define DNA_ALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
#define C_DNA_ALPHA "TVGHEFCDIJMLKNOPQYSAABWXRZtvghefcdijmlknopqysaabwxrz"
#ifndef TICKS_PER_SEC
#define TICKS_PER_SEC 60
#endif
#define PWG_TIME_NO_RESET TIME_NO_RESET
#define PWG_TIME_RESET TIME_RESET
/* ==================================================== */
/* Macros standards */
/* ==================================================== */
#ifndef NEW
#define NEW(typ) (typ*)malloc(sizeof(typ))
#define NEWN(typ, dim) (typ*)malloc((size_t)(dim) * sizeof(typ))
#define REALLOC(typ, ptr, dim) (typ*)realloc((void *) (ptr), (size_t)(dim) * sizeof(typ))
#define FREE(ptr) free((Ptr) ptr)
#endif
#define PWG_STRDUP(s) pwg_dup_string((s), 0)
/* ==================================================== */
/* Prototypes of library functions */
/* ==================================================== */
/* string.c */
char *str_dup_string ( char *str, int extra );
char *str_erase_char ( char *str, int c );
char *str_replace_char ( char *str, int cfrom, int cto );
char *str_trim_trailing ( char *str );
char *str_trim_leading ( char *str );
char *str_pad_right ( char *str, long size, int padchar );
char *str_pad_left ( char *str, long size, int padchar );
char *str_drop_string ( char *str, long start, long nchars );
char *str_insert_string ( char *dst, char *src, long pos );
char *str_extract_string ( char *dst, char *src, long from, long to );
char *str_extract_to_mark ( char *dst, char *src, long start, int markchar );
char *str_reverse_string ( char *str );
char *str_upper_string ( char *str );
char *str_lower_string ( char *str );
/* bioseq.c */
int bio_base_complement ( int c );
char *bio_seq_complement ( char *str );
int bio_codon_translate ( char *codon, int codid);
char *bio_seq_translate ( char *seq, int codid);
#endif

View File

@ -0,0 +1,68 @@
/* ---------------------------------------------------------------- */
/* Copyright (c) Atelier de BioInformatique */
/* @file: libfasta.h */
/* @desc: sequence IO in fasta format / include file */
/* */
/* @history: */
/* @+ <Gloup> : Aug 92 : first version */
/* @+ <Gloup> : Nov 95 : last revised version */
/* ---------------------------------------------------------------- */
#ifndef _H_libfasta
#define _H_libfasta
/* ==================================================== */
/* Constantes */
/* ==================================================== */
#define FASTA_NAMLEN 64 /* max length of seq. name */
#define FASTA_COMLEN 512 /* max length of seq. comment */
#define FASTA_CHAR_PER_LINE 50 /* # of chars per line in output */
/* ==================================================== */
/* Macros standards */
/* ==================================================== */
#ifndef NEW
#define NEW(typ) (typ*)malloc(sizeof(typ))
#define NEWN(typ, dim) (typ*)malloc((unsigned)(dim) * sizeof(typ))
#define REALLOC(typ, ptr, dim) (typ*)realloc((void *) (ptr), (unsigned long)(dim) * sizeof(typ))
#define FREE(ptr) free(ptr)
#endif
/* ==================================================== */
/* Structures de donnees */
/* ==================================================== */
typedef struct { /* -- Sequence ---------------- */
int ok; /* error flag */
long length, /* longueur */
offset, /* offset */
bufsize; /* size of current seq buffer */
char name[FASTA_NAMLEN], /* nom */
comment[FASTA_COMLEN], /* commentaire */
*seq; /* sequence */
} FastaSequence, *FastaSequencePtr;
/* ==================================================== */
/* Prototypes (generated by mkproto) */
/* ==================================================== */
/* libfasta.c */
char *GetFastaName ( char *buffer );
char *GetFastaComment ( char *buffer );
FastaSequencePtr NewFastaSequence ( void );
FastaSequencePtr FreeFastaSequence ( FastaSequencePtr seq );
int ReadFastaSequence ( FILE *streamin, FastaSequencePtr seq );
int GetFastaSequence ( FILE *streamin, FastaSequencePtr seq );
void WriteFastaSequence ( FILE *streamou, FastaSequencePtr seq,
int char_per_line );
void RewindFastaDB ( FILE *streamin );
#endif

View File

@ -0,0 +1,31 @@
# ---------------------------------------------------------------
# @pckg: SeqUtils / V1.0 / Gloup Jan 92
#
# @file: Makefile
# @desc: makefile for Abi library
#
# @history:
# @+ <Gloup> : Jan 96 : Created
# @+ <Gloup> : Jul 93 : revised version
# @+ <Gloup> : Feb 01 : Adapted to helixware
# @+ <Gloup> : Mar 01 : Corrected <if> bug
#
# @note: should be processed with gnu compatible make
# @note: helixware_compatible
#
# @end:
# ---------------------------------------------------------------
#
include ../../config/auto.conf
OSRC = libaabi.c
LOCLIB = libaabi.a
include ../../config/targets/lxlib.targ
include ../../config/targets/help.targ
INCDIR = ../../include

View File

@ -0,0 +1,199 @@
/* ---------------------------------------------------------------- */
/* Copyright (c) Atelier de BioInformatique */
/* @file: libaabi.c */
/* @desc: Abi general purpose library */
/* @+ error notification / system functions */
/* */
/* @history: */
/* @+ <Gloup> : Jan 96 : PWG version */
/* ---------------------------------------------------------------- */
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef HAS_MALLOC_H
#include HAS_MALLOC_H
#endif
#include <errno.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/resource.h>
#ifndef errno
extern int errno;
#endif
#include "libaabi.h"
static int sAbortLevel = FATAL_LEVEL,
sLastError = 0;
/* ==================================================== */
/* Error notifications */
/* ==================================================== */
/* -------------------------------------------- */
/* simple error notifier */
/* -------------------------------------------- */
int abi_error(char *filename, int lineno, char *msg, int level)
{
(void) fprintf(stderr, "*%s* [%d] in file %s at line %d / ",
((level > WARNING_LEVEL) ? "Error" : "Warning"),
errno, filename, lineno);
if (errno != 0)
perror(msg);
else
(void) fprintf(stderr,"-\n");
if (level >= sAbortLevel) {
(void) fprintf(stderr,"*Abort*\n");
exit(level);
}
return 0;
}
/* -------------------------------------------- */
/* report last error since clear_error */
/* -------------------------------------------- */
int abi_last_error()
{
return sLastError;
}
/* -------------------------------------------- */
/* clear error flag */
/* -------------------------------------------- */
void abi_clear_error()
{
sLastError = 0;
}
/* -------------------------------------------- */
/* set up the current abort level */
/* -------------------------------------------- */
void abi_set_abort_level(int level)
{
sAbortLevel = level;
}
/* -------------------------------------------- */
/* kind of hour-glass :-) */
/* -------------------------------------------- */
void abi_play_rotator()
{
static char rotator[] = "|/-\\";
static int rotator_position = 0;
rotator_position = (rotator_position + 1) % 4;
(void) fprintf(stderr,"\r%c", rotator[rotator_position]);
}
/* ==================================================== */
/* Cpu time sys calls */
/* ==================================================== */
/* -------------------------------------------- */
/* Get(/Reset) User Cpu time */
/* -------------------------------------------- */
double abi_user_cpu_time(int reset)
{
static double sLast = 0;
double now, ust;
struct rusage rusage;
(void) getrusage(RUSAGE_SELF, &rusage);
now = (double) rusage.ru_utime.tv_sec
+ ((double) rusage.ru_utime.tv_usec / 1000000.);
ust = now - sLast;
if (reset)
sLast = now;
return ust;
}
/* -------------------------------------------- */
/* Get(/Reset) Sys Cpu time */
/* -------------------------------------------- */
double abi_sys_cpu_time(int reset)
{
static double sLast = 0;
double now, ust;
struct rusage rusage;
(void) getrusage(RUSAGE_SELF, &rusage);
now = (double) rusage.ru_stime.tv_sec
+ ((double) rusage.ru_stime.tv_usec / 1000000.);
ust = now - sLast;
if (reset)
sLast = now;
return ust;
}
/* -------------------------------------------- */
/* Get a Cpu Time string */
/* -------------------------------------------- */
char *abi_str_cpu_time(int reset)
{
static char buffer[256];
double ust, syt, tot;
ust = abi_user_cpu_time(reset);
syt = abi_sys_cpu_time(reset);
tot = ust + syt;
(void) sprintf(buffer, "cpu time user: %f sys: %f tot: %f",
(float) ust, (float) syt, (float) tot);
return buffer;
}
#if 0
/* ==================================================== */
/* Memory state (debug) */
/* ==================================================== */
#define PP(fmt, val) (void) fprintf(stderr, fmt, val)
void abi_memory_info(char *header)
{
struct mallinfo info;
info = mallinfo();
if (header)
PP ("--- Memory State at : %s ---\n", header);
else
PP ("--- %s ---\n", "Memory State");
PP ("total space in arena : %d\n", info.arena);
PP ("number of ordinary blocks : %d\n", info.ordblks);
PP ("number of small blocks : %d\n", info.smblks);
PP ("space in holding block head. : %d\n", info.hblkhd);
PP ("number of holding blocks : %d\n", info.hblks);
PP ("space in small blocks in use : %d\n", info.usmblks);
PP ("space in free small blocks : %d\n", info.fsmblks);
PP ("space in ord. blocks in use : %d\n", info.uordblks);
PP ("space in free ord. blocks : %d\n", info.fordblks);
PP ("space penalty if keep option : %d\n", info.keepcost);
}
#undef PP
#endif

View File

@ -0,0 +1,31 @@
# ---------------------------------------------------------------
# @pckg: SeqUtils / V1.0 / Gloup Jan 92
#
# @file: Makefile
# @desc: makefile for Biosrc / BioSequence & Strings library /
#
# @history:
# @+ <Gloup> : Jan 96 : Created
# @+ <Gloup> : Jul 93 : revised version
# @+ <Gloup> : Feb 01 : Adapted to helixware
# @+ <Gloup> : Mar 01 : Corrected <if> bug
#
# @note: should be processed with gnu compatible make
# @note: helixware_compatible
#
# @end:
# ---------------------------------------------------------------
#
include ../../config/auto.conf
OSRC = string.c \
bioseq.c
LOCLIB = libbio.a
include ../../config/targets/lxlib.targ
include ../../config/targets/help.targ
INCDIR = ../../include

View File

@ -0,0 +1,109 @@
/* ---------------------------------------------------------------- */
/* Copyright (c) Atelier de BioInformatique */
/* @file: bioseq.c */
/* @desc: bio. sequences utility functions */
/* */
/* @history: */
/* @+ <Gloup> : Jan 96 : quick draft */
/* ---------------------------------------------------------------- */
#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include "libbio.h"
#define GENETIC_CODE_INSTANCE
#include "Genetic.h"
#undef GENETIC_CODE_INSTANCE
static char sNuc[] = DNA_ALPHA;
static char sAnuc[] = C_DNA_ALPHA;
static char sGenNuc[] = GEN_NUC_ALPHA;
static char sGenPro[] = GEN_PRO_ALPHA;
static int sNucNum[5] = {0, 1, 2, 3, 3};
/* ---------------------------------------------------- */
/* @Function : int bio_bp_complement */
/* Purpose : return DNA/RNA-Iupac base complement */
/* ---------------------------------------------------- */
int bio_base_complement(int ch)
{
char *c;
if (c = strchr(sNuc, ch))
return sAnuc[(int) (c - sNuc)];
else
return ch;
}
/* ---------------------------------------------------- */
/* @Function : char * bio_seq_complement */
/* Purpose : return sequence complement */
/* ---------------------------------------------------- */
char *bio_seq_complement(char *str)
{
char *s;
for (s = str ; *s ; s++)
*s = bio_base_complement(*s);
return str;
}
/* ---------------------------------------------------- */
/* @Static : int * sTranslateCodon */
/* Purpose : translate codon -> aa */
/* see bio_codon_translate */
/* ---------------------------------------------------- */
static int sTranslateCodon(char *codon, int *code)
{
int i, base, hash;
char *p;
for (i = hash = 0 ; i < 3 ; i++) {
if ((p = strchr(sGenNuc, *codon++)) != NULL) {
base = ((int) (p - sGenNuc)) / 2;
hash = (hash * 4) + sNucNum[base];
}
else {
hash = 64; /* bad letter in codon */
break; /* or incomplete codon */
}
}
return (int) sGenPro[code[hash]];
}
/* ---------------------------------------------------- */
/* @Function : int * bio_codon_translate */
/* Purpose : return amino-acid */
/* input: codon char* 3 bases (in GEN_NUC_ALPHA) */
/* codid int (see Genetic.h) */
/* output: aa in one letter code (in GEN_PRO_ALPHA) */
/* ---------------------------------------------------- */
int bio_codon_translate(char *codon, int codid)
{
return sTranslateCodon(codon, theGeneticCode[codid].code);
}
/* ---------------------------------------------------- */
/* @Function : int * bio_seq_translate */
/* Purpose : translate sequence to protein */
/* ---------------------------------------------------- */
char* bio_seq_translate(char *seq, int codid)
{
int *code;
char *ps, *ns;
if ((codid < 0) || (codid >= GEN_MAX_CODES))
return NULL;
code = theGeneticCode[codid].code;
for (ns = ps = seq ; ns[0] && ns[1] && ns[2] ; ns += 3)
*ps++ = sTranslateCodon(ns, code);
*ps = '\000';
return seq;
}

View File

@ -0,0 +1,359 @@
/* ---------------------------------------------------------------- */
/* Copyright (c) Atelier de BioInformatique */
/* @file: string.c */
/* @desc: some strings utility functions */
/* */
/* @history: */
/* @+ <Hdp> : Jan 94 : first version from abistr.c */
/* @+ <Gloup> : Feb 94 : cleaned and speedup */
/* ---------------------------------------------------------------- */
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "libaabi.h"
#include "libbio.h"
/* ---------------------------------------------------- */
/* @Function : char *str_duplicate_string */
/* Purpose : Make a copy of a string buffer */
/* + extra room */
/* ---------------------------------------------------- */
char *str_dup_string(char *str, int extra)
{
char *dst;
if (! str)
return str;
if (! (dst = NEWN(char, (strlen(str) + extra + 1)))) {
MemoryError();
return NULL;
}
return strcpy(dst, str);
}
/* ---------------------------------------------------- */
/* @Function : char *str_erase_char */
/* Purpose : Erase a char from a string */
/* ---------------------------------------------------- */
char *str_erase_char(char *str, int c)
{
char *s, *se;
if (! str)
return str;
for (se = s = str ; *s ; s++)
if ((*se = *s) != c)
se++;
*se = '\000';
return str;
}
/* ---------------------------------------------------- */
/* @Function : char *str_replace_char */
/* Purpose : replace all occurences of 'cfrom' to 'cto' */
/* ---------------------------------------------------- */
char *str_replace_char(char *str, int cfrom, int cto)
{
char *s;
if (! str)
return str;
for (s = str ; *s ; s++)
if (*s == cfrom)
*s = (char) cto;
return str;
}
/* ---------------------------------------------------- */
/* @Function : char *str_trim_trailing */
/* Purpose : Trim trailing spaces from 'str' */
/* ---------------------------------------------------- */
char *str_trim_trailing(char *str)
{
char *s;
if (! str)
return str;
s = str + strlen(str);
for (--s ; (s >= str) && isspace(*s) ; s--)
/* nop */ ;
*++s = '\000';
return str;
}
/* ---------------------------------------------------- */
/* @Function : char *str_trim_leading */
/* Purpose : Trim leading spaces from 'str' */
/* ---------------------------------------------------- */
char *str_trim_leading(char *str)
{
char *sb, *sn;
if (! (str && isspace(*str)))
return str;
for (sb = sn = str ; isspace(*sn) ; sn++)
/* nop */ ;
while (*sn)
*sb++ = *sn++;
*sb = '\000';
return str;
}
/* ---------------------------------------------------- */
/* @Function : char *str_pad_right */
/* Purpose : Pad 'str' with char 'padchar' on right */
/* to built a string of length 'size' */
/* ---------------------------------------------------- */
char *str_pad_right(char *str, long size, int padchar)
{
long len;
char *s;
if (! (str && (len = strlen(str)) < size))
return str;
s = str + len;
size -= len;
while (size--)
*s++ = (char) padchar;
*s = '\000';
return str;
}
/* ---------------------------------------------------- */
/* @Function : char *str_pad_left */
/* Purpose : Pad 'str' with char 'padchar' on left */
/* to built a string of length 'size' */
/* ---------------------------------------------------- */
char *str_pad_left(char *str, long size, int padchar)
{
long len;
char *s, *t;
if (! (str && (len = strlen(str)) < size))
return str;
s = str + len;
*(t = str + size) = '\000';
while (len--)
*--t = *--s;
while (t > str)
*--t = (char) padchar;
return str;
}
/* ---------------------------------------------------- */
/* @Function : char *str_drop_string */
/* Purpose : Delete 'nchars' chars from 'str', starting */
/* at 'start' */
/* ---------------------------------------------------- */
char *str_drop_string(char *str, long start, long nchars)
{
long len;
char *sb, *sn;
if (! (str && (len = strlen(str)) > start))
return str;
if (len < (start + nchars))
nchars = len - start;
sb = str + start;
sn = sb + nchars;
while (*sn)
*sb++ = *sn++;
*sb = '\000';
return str;
}
/* ---------------------------------------------------- */
/* @Function : char *str_insert_string */
/* Purpose : Insert 'src' into 'dst', at position 'pos' */
/* ---------------------------------------------------- */
char *str_insert_string(char *dst, char *src, long pos)
{
long srclen, dstlen;
char *d, *t;
if ( (! (src && dst))
|| ((srclen = strlen(src)) == 0)
|| ((dstlen = strlen(dst)) <= pos))
return dst;
d = dst + dstlen;
t = d + srclen;
*t = '\000';
dstlen -= pos;
while (dstlen--)
*--t = *--d;
t = src;
while(srclen--)
*d++ = *t++;
return dst;
}
/* ---------------------------------------------------- */
/* @Function : char *str_extract_to_mark */
/* Purpose : Put into 'dst' the substring from 'src' */
/* starting at 'start' up to the char */
/* 'markchar' (or end of string) */
/* ---------------------------------------------------- */
char *str_extract_to_mark(char *dst, char *src, long start, int markchar)
{
long len;
char *d;
if (! (src && dst))
return dst;
if ((len = strlen(src)) < start)
start = len;
src += start;
for (d = dst ; *src && (*src != markchar) ; src++, d++)
*d = *src;
*d = '\000';
return dst;
}
/* ---------------------------------------------------- */
/* @Function : char *str_extract_string */
/* Purpose : Extract substring from string */
/* 'src', starting at 'from' up to 'to' */
/* included (or end of string) */
/* ---------------------------------------------------- */
char *str_extract_string (char *dst, char *src, long from, long to)
{
long len;
char *d, *end;
if (! (src && dst))
return dst;
len = strlen(src);
if (len < from)
from = len;
if (len < to)
to = len;
end = src + to;
src += from;
for (d = dst ; *src && (src <= end) ; src++, d++)
*d = *src;
*d = '\000';
return dst;
}
/* ---------------------------------------------------- */
/* @Function : char *str_reverse_string */
/* Purpose : reverse symbol order in string 'str' */
/* ---------------------------------------------------- */
char *str_reverse_string(char *str)
{
char *sb, *se, c;
if (! str)
return str;
sb = str;
se = str + strlen(str) - 1;
while(sb <= se) {
c = *sb;
*sb++ = *se;
*se-- = c;
}
return str;
}
/* ---------------------------------------------------- */
/* @Function : char *str_upper_string */
/* Purpose : uppercase string 'str' */
/* ---------------------------------------------------- */
#define IS_LOWER(c) (((c) >= 'a') && ((c) <= 'z'))
#define TO_UPPER(c) ((c) - 'a' + 'A')
char *str_upper_string(char *str)
{
char *s;
if (! str)
return str;
for (s = str ; *s ; s++)
if (IS_LOWER(*s))
*s = TO_UPPER(*s);
return str;
}
#undef IS_LOWER
#undef TO_UPPER
/* ---------------------------------------------------- */
/* @Function : char *str_lower_string */
/* Purpose : lowercase string 'str' */
/* ---------------------------------------------------- */
#define IS_UPPER(c) (((c) >= 'A') && ((c) <= 'Z'))
#define TO_LOWER(c) ((c) - 'A' + 'a')
char *str_lower_string(char *str)
{
char *s;
if (! str)
return str;
for (s = str ; *s ; s++)
if (IS_UPPER(*s))
*s = TO_LOWER(*s);
return str;
}
#undef IS_UPPER
#undef TO_LOWER

View File

@ -0,0 +1,2 @@
GATCCATTGATAAGGGTTGATAAGGAATGATTCTTGGATCGTACCAACCT
TTATCTGCGGCATAGTCCATTACAAACATGTGGTCTGTAAACACTCTTCC

View File

@ -0,0 +1,140 @@
[read] 100 symbols
GATCCATTGA
TAAGGGTTGA
TAAGGAATGA
TTCTTGGATC
GTACCAACCT
TTATCTGCGG
CATAGTCCAT
TACAAACATG
TGGTCTGTAA
ACACTCTTCC
[pwg_trim_leading]
GATCCA
TTGATAAGGG
TTGATAAGGA
ATGATTCTTG
GATCGTACCA
ACCTTTATCT
GCGGCATAGT
CCATTACAAA
CATGTGGTCT
GTAAACACTC
TTCC
GATCCATTGA
TAAGGGTTGA
TAAGGAATGA
TTCTTGGATC
GTACCAACCT
TTATCTGCGG
CATAGTCCAT
TACAAACATG
TGGTCTGTAA
ACACTCTTCC
[str_trim_trailing]
GATCCATTGA
TAAGGGTTGA
TAAGGAATGA
TTCTTGGATC
GTACCAACCT
TTATCTGCGG
CATAGTCCAT
TACAAACATG
TGGTCTGTAA
ACACTCTTCC
GATCCATTGA
TAAGGGTTGA
TAAGGAATGA
TTCTTGGATC
GTACCAACCT
TTATCTGCGG
CATAGTCCAT
TACAAACATG
TGGTCTGTAA
ACACTCTTCC
[str_pad_right]
GATCCATTGA
TAAGGGTTGA
TAAGGAATGA
TTCTTGGATC
GTACCAACCT
TTATCTGCGG
CATAGTCCAT
TACAAACATG
TGGTCTGTAA
ACACTCTTCC
xxxxxxxxxx
xxxxxxxxxx
xxxxxxxxxx
xxxxxxxxxx
xxxxxxxxxx
[str_pad_left]
xxxxxxxxxx
xxxxxxxxxx
xxxxxxxxxx
xxxxxxxxxx
xxxxxxxxxx
GATCCATTGA
TAAGGGTTGA
TAAGGAATGA
TTCTTGGATC
GTACCAACCT
TTATCTGCGG
CATAGTCCAT
TACAAACATG
TGGTCTGTAA
ACACTCTTCC
[str_drop_string]
GATCCATTGA
GTTGATAAGG
AATGATTCTT
GGATCGTACC
AACCTTTATC
TGCGGCATAG
TCCATTACAA
ACATGTGGTC
TGTAAACACT
CTTCC
[str_insert_string]
GATCCATTGA
xxxxxTAAGG
GTTGATAAGG
AATGATTCTT
GGATCGTACC
AACCTTTATC
TGCGGCATAG
TCCATTACAA
ACATGTGGTC
TGTAAACACT
CTTCC
[str_extract_string]
TAAGGGTTGA
T
[str_extract_to_mark]
TAAGGGTTGA
TAAGGAATGA
TT
[bio_seq_complement]
CTAGGTAACT
ATTCCCAACT
ATTCCTTACT
AAGAACCTAG
CATGGTTGGA
AATAGACGCC
GTATCAGGTA
ATGTTTGTAC
ACCAGACATT
TGTGAGAAGG
[str_reverse_string]
CCTTCTCACA
AATGTCTGGT
GTACAAACAT
TACCTGATAC
GGCGTCTATT
TCCAACCATG
CTAGGTTCTT
AGTAAGGAAT
AGTTGGGAAT
AGTTACCTAG
[end of test]

View File

@ -0,0 +1,105 @@
/* ---------------- */
/* @file: ctest.c */
/* ---------------- */
#include <stdio.h>
#include <stdlib.h>
#include "libbio.h"
#define BUFFER_SIZE 1024
#define CPL 10
#define PRINT(s) sPrintSequence(stdout, (s), CPL);
/* ---------------------------- */
static void sPrintSequence(FILE *fd, char *seq, int nchars)
{
long count;
for (count = 0 ; *seq ; seq++, count++) {
if (count && ((count % nchars) == 0))
fputc('\n', fd);
fputc(*seq, fd);
}
fputc('\n', fd);
}
/* ---------------------------- */
main ()
{
long len;
char line[256], seq[BUFFER_SIZE], sseq[BUFFER_SIZE];
*seq = '\000';
while (gets(line))
strcat(seq, line);
len = strlen(seq);
printf ("[read] %ld symbols\n", len);
PRINT(seq);
printf("[pwg_trim_leading]\n");
strcpy(sseq, " ");
strcat(sseq, seq);
PRINT(sseq);
str_trim_leading(sseq);
PRINT(sseq);
printf("[str_trim_trailing]\n");
strcpy(sseq, seq);
strcat(sseq, " ");
PRINT(sseq);
str_trim_trailing(sseq);
PRINT(sseq);
printf("[str_pad_right]\n");
strcpy(sseq, seq);
str_pad_right(sseq, len + len/2, 'x');
PRINT(sseq);
printf("[str_pad_left]\n");
strcpy(sseq, seq);
str_pad_left(sseq, len + len/2, 'x');
PRINT(sseq);
printf("[str_drop_string]\n");
strcpy(sseq, seq);
str_drop_string(sseq, 10, 5);
PRINT(sseq);
printf("[str_insert_string]\n");
strcpy(sseq, seq);
str_insert_string(sseq, "xxxxx", 10);
PRINT(sseq);
printf("[str_extract_string]\n");
str_extract_string(sseq, seq, 10, 20);
PRINT(sseq);
printf("[str_extract_to_mark]\n");
str_extract_to_mark(sseq, seq, 10, 'C');
PRINT(sseq);
printf("[bio_seq_complement]\n");
strcpy(sseq, seq);
bio_seq_complement (sseq);
PRINT(sseq);
printf("[str_reverse_string]\n");
strcpy(sseq, seq);
str_reverse_string(sseq);
PRINT(sseq);
printf("[end of test]\n");
exit(0);
}

View File

@ -0,0 +1,31 @@
# ---------------------------------------------------------------
# @pckg: SeqUtils / V1.0 / Gloup Jan 92
#
# @file: Makefile
# @desc: makefile for Fasta / Fasta format IO/
#
# @history:
# @+ <Gloup> : Jan 92 : Created (Bip version)
# @+ <Gloup> : Nov 97 : Standalone version
# @+ <Gloup> : Feb 01 : Adapted to helixware
# @+ <Gloup> : Mar 01 : Corrected <if> bug
#
# @note: should be processed with gnu compatible make
# @note: helixware_compatible
#
# @end:
# ---------------------------------------------------------------
#
include ../../config/auto.conf
OSRC = libfasta.c
LOCLIB = libfasta.a
include ../../config/targets/lxlib.targ
include ../../config/targets/help.targ
INCDIR = ../../include

View File

@ -0,0 +1,373 @@
/* ---------------------------------------------------------------- */
/* Copyright (c) Atelier de BioInformatique */
/* @file: libfasta.c */
/* @desc: sequence IO in fasta format */
/* */
/* @history: */
/* @+ <Gloup> : Aug 92 : first version */
/* @+ <Gloup> : Nov 95 : last revised version */
/* ---------------------------------------------------------------- */
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "libfasta.h"
#define SECURE 0 /* 1 : secure copy ; 0 : quickest copy */
#define PUSH_BACK 1
#define READ_NEXT 0
#define SERIAL 1
#define INDEXED 0
#ifdef MACINTOSH
#define LINE_FEED '\r'
#else
#define LINE_FEED '\n'
#endif
static int sRetained = 0;
/* -------------------------------------------- */
/* @static: lecture bufferisee */
/* -------------------------------------------- */
static char * sNextIOBuffer(FILE *streamin, int retain, int serial)
{
int reused;
char *buf, *end;
static char sBuffer[BUFSIZ]; /* BUFSIZ in <stdio.h> */
reused = (retain || sRetained) && serial;
sRetained = retain;
buf = ( reused
? sBuffer
: fgets(sBuffer, sizeof(sBuffer), streamin));
if (! buf) /* end of file */
return NULL;
end = buf + strlen(buf) - 1;
if (*end == LINE_FEED) /* remove trailing \n */
*end = '\000';
return ( *buf || reused
? buf
: sNextIOBuffer(streamin, retain, serial));
}
#if SECURE
/* -------------------------------------------- */
/* compte le nombre de caracteres alpha dans */
/* un buffer */
/* -------------------------------------------- */
static long sCountAlpha(char *buf)
{
long count;
for (count = 0 ; *buf ; buf++)
if (isalpha(*buf))
count++;
return count;
}
#endif
#if SECURE
/* -------------------------------------------- */
/* copy only alpha chars from s2 to s1 */
/* -------------------------------------------- */
static char *sStrcpyAlpha(char *s1, char *s2)
{
for( ; *s2 ; s2++)
if (isalpha(*s2))
*s1++ = *s2;
*s1 = '\000';
return s1;
}
#endif
/* -------------------------------------------- */
/* skip to next space in buffer */
/* -------------------------------------------- */
static char *sNextSpace(char *buffer)
{
for (; *buffer ; buffer++)
if (isspace(*buffer))
return buffer;
return NULL;
}
/* -------------------------------------------- */
/* returns sequence name (FASTA) */
/* -------------------------------------------- */
char *GetFastaName(char *buffer)
{
char c[2];
static char name[FASTA_NAMLEN];
*c = buffer[FASTA_NAMLEN];
buffer[FASTA_NAMLEN] = '\000';
if (sscanf(buffer + 1, "%s", name) != 1)
(void) strcpy(name, "<no Name>");
buffer[FASTA_NAMLEN] = *c;
return name;
}
/* -------------------------------------------- */
/* returns sequence comment (FASTA) */
/* -------------------------------------------- */
char *GetFastaComment(char *buffer)
{
char *space;
static char comment[FASTA_COMLEN];
buffer[FASTA_COMLEN] = '\000';
space = sNextSpace(buffer);
(void) strcpy(comment, (space ? space + 1 : "<no comment>"));
return comment;
}
/* -------------------------------------------- */
/* liberation d'une sequence */
/* -------------------------------------------- */
FastaSequencePtr FreeFastaSequence(FastaSequencePtr seq)
{
if (seq) {
if (seq->seq) FREE(seq->seq);
FREE(seq);
}
return NULL;
}
/* -------------------------------------------- */
/* allocation d'une sequence */
/* -------------------------------------------- */
FastaSequencePtr NewFastaSequence()
{
FastaSequencePtr seq;
if (! (seq = NEW(FastaSequence)))
return NULL;
seq->length = 0;
seq->offset = -1;
if (! (seq->seq = NEWN(char, BUFSIZ)))
return FreeFastaSequence(seq);
seq->bufsize = BUFSIZ;
*(seq->name) = '\000';
*(seq->comment) = '\000';
seq->ok = 1;
return seq;
}
/* -------------------------------------------- */
/* lecture/redimensionnement d'une sequence au */
/* format Fasta */
/* <Lecture Serie> */
/* returns : 0 -> no more to read */
/* 1 -> read ok */
/* <but> you must check seq->ok == 1 */
/* ( seq->ok == 0 => memory error) */
/* -------------------------------------------- */
int ReadFastaSequence(FILE *streamin, FastaSequencePtr seq)
{
long readlen, buflen;
char *buffer, *tbuf;
seq->ok = 0; /* assume error */
buflen = seq->length = 0;
seq->offset = ftell(streamin);
buffer = sNextIOBuffer(streamin, READ_NEXT, SERIAL);
if (! (buffer && (*buffer == '>'))) /* sync error */
return 0; /* last sequence */
if (seq->offset)
seq->offset -= (strlen(buffer) + 1);
(void) strcpy(seq->name, GetFastaName(buffer));
(void) strcpy(seq->comment, GetFastaComment(buffer));
while (buffer = sNextIOBuffer(streamin, READ_NEXT, SERIAL)) {
if (*buffer == '>') {
(void) sNextIOBuffer(streamin, PUSH_BACK, SERIAL); /* push it back */
break;
}
#if SECURE
readlen = sCountAlpha(buffer);
#else
readlen = strlen(buffer);
#endif
buflen += readlen;
if (buflen >= seq->bufsize) {
if (! (tbuf = REALLOC(char, seq->seq, 2 * buflen + 1)))
return 1; /* but seq->ok is 0 */
seq->seq = tbuf;
seq->bufsize = 2 * buflen + 1;
}
#if SECURE
sStrcpyAlpha(seq->seq + seq->length, buffer);
#else
(void) memcpy(seq->seq + seq->length, buffer, readlen);
#endif
seq->length = buflen;
}
seq->seq[seq->length] = '\000';
return (seq->ok = 1);
}
/* -------------------------------------------- */
/* lecture/redimensionnement d'une sequence au */
/* format Fasta */
/* <Lecture Indexee> */
/* returns : 0 -> memory error */
/* 1 -> read ok */
/* -------------------------------------------- */
int GetFastaSequence(FILE *streamin, FastaSequencePtr seq)
{
long readlen, buflen;
char *buffer, *tbuf;
seq->ok = 0; /* assume error */
buflen = seq->length = 0;
(void) fseek(streamin, seq->offset, SEEK_SET);
buffer = sNextIOBuffer(streamin, READ_NEXT, INDEXED);
if (! (buffer && (*buffer == '>'))) /* sync error */
return 0; /* last sequence */
if (seq->offset)
seq->offset -= (strlen(buffer) + 1);
(void) strcpy(seq->name, GetFastaName(buffer));
(void) strcpy(seq->comment, GetFastaComment(buffer));
while (buffer = sNextIOBuffer(streamin, READ_NEXT, INDEXED)) {
if (*buffer == '>')
break;
#if SECURE
readlen = sCountAlpha(buffer);
#else
readlen = strlen(buffer);
#endif
buflen += readlen;
if (buflen >= seq->bufsize) {
if (! (tbuf = REALLOC(char, seq->seq, 2 * buflen + 1)))
return 0;
seq->seq = tbuf;
seq->bufsize = 2 * buflen + 1;
}
#if SECURE
sStrcpyAlpha(seq->seq + seq->length, buffer);
#else
(void) memcpy(seq->seq + seq->length, buffer, readlen);
#endif
seq->length = buflen;
}
seq->seq[seq->length] = '\000';
return (seq->ok = 1);
}
/* -------------------------------------------- */
/* ecriture d'une sequence au format Fasta */
/* -------------------------------------------- */
void WriteFastaSequence(FILE *streamou, FastaSequencePtr seq,
int char_per_line)
{
long i, nlines, rest;
char *buf, *end, tempo;
(void) fputc('>', streamou);
(void) fputs((*(seq->name) ? seq->name : "<no name>") , streamou);
(void) fputc(' ', streamou);
(void) fputs((*(seq->comment) ? seq->comment : "<no comment>"), streamou);
(void) fputc(LINE_FEED, streamou);
nlines = seq->length / char_per_line;
buf = seq->seq;
for (i = 0 ; i < nlines ; i++) {
end = buf + char_per_line;
tempo = *end;
*end = '\000';
(void) fputs(buf, streamou);
(void) fputc(LINE_FEED , streamou);
*end = tempo;
buf += char_per_line;
}
if ((rest = (seq->length % char_per_line)) != 0) {
end = buf + rest;
tempo = *end;
*end = '\000';
(void) fputs(buf, streamou);
(void) fputc(LINE_FEED , streamou);
*end = tempo;
}
}
/* -------------------------------------------- */
/* rewind db file */
/* -------------------------------------------- */
void RewindFastaDB(FILE *streamin)
{
sRetained = 0; /* forget previous buffer */
if (streamin)
rewind(streamin);
}

View File

@ -0,0 +1,202 @@
>AHTRNRHL A. hydrophila DNA for tRNA-Arg, tRNA-His, tRNA-Leu and tRNA-Pro
CGATGGAAAAACAAGCGGTTGATTCTGCGAGATAAAAAAATAGTAGACAAGGTGGGTCCC
CATCATTAATATTGCGCCCCGTTCGACAGCGTAGCGCCCGTAGCTCAGCTGGATAGAGCG
CTGCCCTCCGGAGGCAGAGGTCACAGGTTCGAATCCTGTCGGGCGCACCATCAAAAGTGC
GCCGGTTAAGCGGGCGAGTTGAAGAACGAAAACAGCTGTGGTGGCTGTAGCTCAGTTGGT
AGAGTCCCGGATTGTGATTCCGGTTGTCGTGGGTTCGAGCCCCATCAGCCACCCCATTTT
ACAGCTTGTCAGGTATGCGAAGGTGGCGGAATTGGTAGACGCGCTAGCTTCAGGTGTTAG
TGCCCCCCGGGTGTGAGGGTTCGAGTCCCTCTCTTCGCACCATACTTGCTGTATGACGAG
GATGACCGTTCATGGTCGTCTTTGTGTTTTGAAAAGAAAGCTTTTTTAAAGCCTCGGTGA
TTAGCGCAGCCCGGTAGCGCATCTGGTTTGGGACCAGAGGGTCAAAGGTTCGAATCCTTT
ATCACCGACCACATTCTGAAAAACCCCGCTCAGGCGGGGTTTTTTGCTTTCTGTCGTCTG
CCAGCCTGCTGCCGCAGGTTT
>ALTRNA11 A.laidlawii 23S rRNA, 5S rRNA, tRNA-Val, tRNA-Thr, tRNA-Lys, tRNA-Leu, tRNA-Ala, tRNA-Met (elongator), tRNA-Ile, tRNA-Ser, tRNA-Met (initiator), tRNA-Asp & tRNA-Phe genes
CAGTACTACAGCTGGGTAGCTATGTGCGGAAGGGATAAACGCTGAAAGCATCTAAGCGTG
AAGCCCCCCTTAAGATGAGATTTCCCAATTAGTAAGACCCCTCAAAGACTATGAGGTTGA
TAGGCCAGGTGTGTAAGTACAGCGATGTATTCAGCTTACTGGTACTAATAGGTCGAGGAC
TTAACCTTTCGTGATGATTCGCATCACATTCTTTTCAGAGAAAATCATTTATCTAGTTTT
GAGAGCATTATGTCTGGTGACGATGGCAAGATGGTTCCACCTGTTCCCATCCCGAACACA
GAAGTTAAGCATCTTAGCGCCGACGATAGTTATTACAATTAGCGAAAATAGGACGTTGCC
AGGCATATCATCTCAATTTTATACCATTTTTTGGAGGATTAGCTCAGTTGGGAGAGCACC
TGCCTTACAAGCAGGGGGTCGGCGGTTCAAGCCCGTCATCCTCCACCACTTTACAAGCCG
AAATAGCTCAATCGGTAGAGCAACTGATTTGTAATCAGTAGGTTGCGGGTTCAATTCCTG
TTTTCGGCACCACTACAATGTCCCGTTAGCTCAGGTGGTAGAGCACTTGACTTTTAATCA
AGGTGTCGATGGTTCGAGTCCATCACGGGACACCACTTTTATAATTTAATGCCCGGGTGG
TGAAATCGGTAGACACGCAGGACTTAAAATCCTGTGGCATAAAAGCCATGTCGGTTCAAG
TCCGACCCCGGGCACCACTAAAAAATTAATTAAATTGTTGGGGCTTTAGCTCAGCTGGGA
GAGCGCCTGCCTTGCACGCAGGAGGTCAGCGGTTCGATCCGCTAAGCTCCACCAGTTTAC
GATTTTAATCAAGTATCAAAAAGTTTCTAACCAATATATGGCGGTGTAGCTCAGCTGGCT
AGAGCGTGCGGTTCATACCCGCAAGGTCGAGGGTTCAAGTCCCCCCGCCGCTACCAGTAC
GGACCCGTAGCTCAGTTGGTTAGAGCTACCGGCTCATAACCGGTCGGTCGTTGGTTCGAG
TCCAACCGGGTCCACCAGTATTAACTTTTAAATAAACGTGGAGGAATACCCAAGAGGCTG
AAGGGATCGGTCTTGAAAACCGACAGGGTGTAAAAGCCGCGGGGGTTCAAATCCCTCTTC
CTCCGCCACTACAATTTTAAAACCAGTTATGGTATAATAACAACATATCGCGGGATAGAG
CAGTCTGGTAGCTCGTCGGGCTCATAACCCGAAGGTCGATGGTTCAAATCCATCTCCCGC
AACCAAATAAAATTTAAAACGGTCCGGTGGTGTAGGGGTTAACATGCCTGCCTGTCACGC
AGGAGATCGCGGGTTCAAATCCCGTCCGGACCGCCATTTTAGTGGCTCTGTAGCTCAGTC
GGTAGAGCAGTGGCCTGAAGAGCCTCGTGTCAGCCGGTTCGATTCCGCTTGGAGCCACCA
CTTATGAACTTAGTAAAGTCTGTTAAATACAGGCTTTTTTTATTGTTTAAGTATAGAGTT
TTGGTATAATAGGTTTAAAATAAAGGGGAAAGTTATGAAA
>ALTRNAARG A.laidlawii tRNA-Arg gene
TTCACAATTTGAGTCATAAAAGGCTTTACATTCAGCAGTGAATAGGTTATAATAGTCATT
GCTGAATAAGTGTGTCCGAATAGCTCAGCTGGATAGAGCAATAGCCTTCTAAGCTATCGG
TCGGGGGTTCGAATCCCTCTTCGGACGCCATCATGGAATTAAATAATGCTACACTTCGGT
GTGGCATTTTTTTATGCTTTTGAAGTATTGTTGATATATGTTTGAAGTAGCATTGAGAAG
GAAA
>ALTRNAASN A.laidlawii tRNA-Asn, 23S rRNA & 5SrRNA genes
CCAATTAGTAAGCCCCTCAAAGACTATGAGGTTGATAGGCCAGGTGTGTAAGTACAGCGA
TGTATTCAGCTTACTGGTACTAATAGGTCGAGGACTTAACCTTTCGTGATGATTCGCATC
ACATTCTTTTCAGAGAAAATCATTTATCTAGTTTTGAGAGCATTATGTCTGGTGACGATG
GCAAGATGTTCCACCTGTTCCCTACCCGAATATAGAAGTTAAGCATCTTAGCGCCGACGA
TAGTTATTACAATTAGCGAAAATAGGACGTTGCCAGGCATTTACTTTCTTAAATATTTTA
TATGTAGCTTAGTGTCACCCATGAATTATCAATTGTAAGCCTACTTAGCTCAGTTGGTTA
GAGCACCTGACTGTTAATCAGGGGGTCGCTGGTTCGAGTCCAGCAGTGGGCGCCACTTTT
TTTATAAATTAAAACTCACATGAACTGTGAGTTTTTTATTTGTCTAAATATGGAGTAATT
CATTGATTTTATCTAATAAATAAGGGTTAATTTCAAAGTTTCAACTAAAACTTCAACATT
CATGCGCAGATTCCTTATTT
>ALTRNAGLY A.laidlawii tRNA-Gly gene
TTTTACCAAAGATTTGTGGTATAATTCTTACAAATGAATAAGTTAGAAAAAAAGTAAATC
TTTTTTGAAATAAGGCTTGACAAGTATGGATGCATTTGATATCATAATAAAGCAACATGT
CGGTGTCGTATATGGTTATTACAGGCCTTGCCAAGGCTTAGACGGCAGTTCGATCCTGCT
CACCGGCTCCAGTAAATAATAAACAAGTTCTCAAGTAAATGCTTGAGGACTTTTTTTATT
TTCTTATAACTTAATTTGACTTAAAATTTCACCAACTTGA
>ALTRNAHGL A.laidlawii tRNA-His, tRNA-Gln & tRNA-Leu genes
ATCTTGCAATTACTTCCTTTCTTTTATATAATATCAAATATATCTTGTAAAGGATATAAA
AAACTTATTTTTCCGAAATTTTAACTTTACAAAATAAAATAAGTTGGTATAATAATACTT
GCGCAAAGTATGGCGGTTGTGGCGAAGTGGTTAACGCATCGGCTTGTGGCGCCGACACTC
GGGGGTTCAATTCCCCTCGGCCGCCCCATAATTGCGAATTAAATAAAATGTAGGCCCATA
GCCAAGCGGTAAGGCAACGGACTTTGACTCCGTCACTCGTAGGTTCAAATCCTGCTGGGC
CTGCCATTTTTATAAAGAAAGTCCACTATCCATGCGGGTGTGGCGAAATTGGCAGACGCA
CTAGACTTAGGATCTAGCGCTTTACGGCATGCAGGTTCAAGTCCTGTCACCCGCACCATA
TTGAATTGAAATGGTTTGATACAATTTGTATCAGACCTTTTTTGTATTTCGGGCGCTATT
TATCATTACG
>ALTRNALEU A.laidlawii tRNA-Leu gene
CTCAGATTTAATTGATAAAAATAACAAAAAATTGAATAAAAAGTAGGCTTATGCCTGCTT
ATATCCCCGTGTGGCGAAATGGTAGACGCGCTTGACTCAAAATCAAGTAGTGAAGACTGT
GCTGGTTCGAGTCCGGTCACGGGGACCAATCTTAAAGTTAAAGTTTTTCGTGTCCCAACA
AGGGATATGAAAAACTTTTGTTTTAGTAACAGATGTTTCTTTAAAAAAGAGGTATAACCA
ATGAAAAAACAAGTGATTCAACGTACAGAGACCATTGATT
>ALTRNALYS A.laidlawii tRNA-Lys gene
CTTTTATTTTTGTTTGACAAATAAGTAAACTTGATATACAATGATAATGCTTTATAAAAA
ATGCATCCATAGCTCAGTTGGTAGAGCAACAGACTCTTAATCTGTGGGTCCACGGTTCGA
GCCCGTGTGGGTGTACCATTTATATATGAAAAGGCCATTTGATGAACATATCAAATGGCC
TTATTTTTTCGTTATTAAAATCAATAATTATTTTACTCTCTTACCTTTTTCATAGTATTT
AGACGCCCATATAACTATAGATTATTTTCCCATCTTCTAT
>ALTRNASG A.laidlawii tRNA-Ser & tRNA-Glu genes
TACATCAAATTAGTGATTGACAACTAGAATAAAATAAGTTAATATAATAAGGCATGATTT
AATTCATGCCTACAATGGAGCGATACTCAAGAGGCCGAAGAGGGCGCACTGCTAACGCGT
TAGACGGTTAACCCCGTGCGAGGGTTCAAATCCCTCTCGCTCCGCCATTGGCCCGTTGGA
GAAACGGTTAACTCACATGCCTTTCACGCATGCATTCACGGGTTCGAATCCCGTACGGGT
CACCATTAATTATTGAAATACTCTTCTCACCCGTGGGAAGGTTTTTTTTCTAGACATTTA
TGTAGAGTTTTAAACTCATTAACAGATGTTCTAAGTTGAAAAAACTGAAT
>ALTRNW Acholeplasma laidlawii gene for tRNA-Trp
ATAATCTAATTTCTACACCTATAAGCGCTTATAACAGCCTAAAACACATAATTTAATAAA
ATTAAGTTTTCAATGATATGATTTGTTCATTTTCAATTGAAAACTTAATTTTGTTATGCT
ATACTAATAAGGCGTGAACGGGGGCATGGTGTCAACGGTAGCACACAGGTCTCCAAAACC
TTTAGTGTGGGTTCGAATCCTGCTGCCCTCGCCATCTAATAATAAGCAATGTTGCATACA
TGCAGCATTTTTTTTATACACTCGACTATATGCTCTTCTTGCAATAATAGTCTAATTAGG
TTTTTAGTCTAAATCATGCTATAATGAAAAAAGAAAGAGGTGAACAACTA
>ANLCAA Anacystis nidulans Leu-tRNA-CAA.
GGGCAAGTGGCGGAATTGGTAGACGCAGCAGACTCAAAATCTGCCGCTAGCGATAGTGTG
TGGGTTCGAGTCCCACCTTGCCCACCA
>ANMF Anacystis nidulans initiator Met-tRNA-f.
CGCGGGGTAGAGCAGCCTGGTAGCTCGTCGGGCTCATAACCCGAAGGTCAGAGGTTCAAA
TCCTCTCCCCGCCACCA
>BSG1 B.subtilis Gly-tRNA-1.
GCGGGTGTAGTTTAGTGGTAAAACCTCAGCCTTCCAAGCTGATGTCGTGAGTTCGATTCT
CATCACCCGCTCCA
>BSMF B.subtilis initiator Met-tRNA-f.
CGCGGGGTGGAGCAGTTCGGTAGCTCGTCGGGCTCATAACCCGAAGGTCGCAGGTTCAAA
TCCTGCCCCCGCAACCA
>BSMM B.subtilis Met-tRNA-m.
GGCGGTGTAGCTCAGCGGCTAGAGCGTACGGTTCATACCCGTGAGGTCGGGGGTTCGATC
CCCTCCGCCGCTACCA
>BSTRLC B.stearothermophilus Leu-tRNA-CAA.
GCCGATGTGGCGGAATTGGCAGACGCGCACGACTCAAAATCGTGTGGGCTTTGCCCGTGT
GGGTTCGACTCCCACCATCGGCACCA
>BSTRN Bacillus sp. strain PS3 genes for tRNA-Asn, tRNA-Ser, tRNA-Glu, tRNA-Val, tRNA-Met and tRNA-Asp.
GAATTCAAGAAGCAGTCTCGTTCCTTGAAAACTAGATAACCGATAAAGCAAAGGAAGAAG
CCGAGAGCGCGATAGGTTAAGCTGGAAAGGGCGCACGGTGGATGCCTTGGCACTAGGAGC
CGATGAAGGACGGGGCAAACGCCGAAACGCTTCGGGGAGCTGTAAGCAAGCGTTGATCCG
GAGATGTCCGAATGGGGGAACCCACTGTCCGTAATGGGGCAGTATCCATGCCTGAATCCA
TAGGGCGTGGAGGGCACACCCGGGGAACTGAAACATCTTAGTACCCGGAGAGAAGAAAGC
AACCGCGATTCCCTGAGTAGCGGCGAGCGAAACGGGAACAGCCCAAACCAAGAGGCGAGT
CCTCTTGGGGTTGTAGGACCGCTCACGATGGGAGTGAGAAAGGGACGGGGTAGACGAACC
GGTCTGGAACGGCCGGCCAGAGAAGGTGAGAGCCCTGTAGTCGAAACTTCGTTCCCTCCC
GAGCGGATCCTGAGTACGGCGGGACACGAGGAATCCCGTCGGAAGCAGGGAGGACCATCT
CCCAAGGCTAAATACTCCCTAGTGACCGATAGTGCACCAGTACCGTGAGGGAAAGGTGAA
AAGCACCCCGGGAGGGGAGTGAAAGAGAACCTGAAACCGTGTGCCTACAAGTAGTCAGAG
CCCGTTGATGGGTGATGGCGTGCCTTTTGTAGAATGAACCGGCGAGTGACGATGGCGTGC
GAGGTTAAGCCGAAGAGGCGGAGCCGCAGCGAAAGCGAGTCTGAACAGGGCGTGTGAGTA
CGTCGTCGTCGACCCGAAACCAGGTGATCTACCCATGTCCAGGGTGAAGGCCGGGTAACA
CCGGCTGGAGGCCGAACCCACGCACGTTGAAAAGTGCGGGGATGAGGTGTGGGTAGGGGT
GAAATGCCAATCGAACTTGGAGATAGCTGGTTCTCCCCGAAATAGCTTTAGGGCTAGCCT
CGGGTTTAGGAGTCTTGGAGGTAGAGCACTGATTGGGCTAGGGGCCAAACCGGGTTACCG
AACCCAGTCAAACTCCGAATGCCAATGACTTATGCCCGGGAGTCAGACTGCGAGTGATAA
GATCCGTGGTCGAGAGGGGAACAGCCCAGACCGCCAGCTAAGGCCCCGAAGTGCACGTTC
AGTGGAAAAGGATGTGGAGTTGCCGAGACAACCAGGATGTTGGCTTAGAAGCAGCCACCA
TTTAAAGAGTGCGTAATAGCTCACTGGTCGAGTGACTCTGCGCCGAAAATGTACCGGGGC
TAAACGTGCCGCCGAAGCTGCGGGATGACCGTTGGTCATCGGTAGGGGAGCGTTCTAAGG
GCAGAGAAGCCAGACCGGAAGACTGGTGGAGCGCTTAGAAGTGAGAATGCCGGTATGAGT
AGCGAAAACAGAGGTGAGAATCCTCTGCGCCGAAAGCCTAAGGGTTCCTGAGGAAGGTTC
GTCCGCTCAGGGTTAGTCGGGACCTAAGCCGAGGCCGAAACGTAGGTGATGGACAACAGG
TTGAGATTCCTGTACCACCTTCTTCCCGTTTGAGCGATGGGGGGACGCAGGAGGATAGGG
CGAGCAGGCGGCTGGAAGAGCCTGTCCAAGCCGTGACGTGATCCGCAGGCAAATCCGCGG
ATCATAAGGCCAAGCGGTGACGGCGACGGAGTATCCGGAAGTCCCCGATTTCACACTGCC
AAGAAAAGCCTCTAGCGAGGGAAGAGGTGCCCGTACCGCAAACCGACACAGGTAGGCGAG
GAGAGAATCCTAAGGCGCGCGGGAGAACTCTCGTTAAGGAACTCGGCAAAATGACCCCGT
AACTTCGGGAGAAGGGGTGCTCTTTTGGGTGAAGAGCCCTGAAGAGCCGCAGTGAAAAGG
CCCAAGCGACTGTTTATCAAAAACACAGGTCTCTGCGAAGCCGAAAGGCGACGTATAGGG
GCTGACACCTGCCCGGTGCTGGAAGGTTAAGGGGAGCGCTTAGCGGAAGCGAAGGTGCGA
ACCGAAGCCCCAGTAAACGGCGGCCGTAACTATAACGGTCCTAAGGTAGCGAAATTCCTT
GTCGGGTAAGTTCCGACCCGCACGAAAGGTGTAACGACTTGGGCGCTGTCTCAACGAGAG
ACCCGGTGAAATTATACTACCTGTGAAGATGCAGGTTACCCGCGACAGGACGGAAAGACC
CCGTGGAGCTTTACTGCAGCCTGATATGGAATTTTGGTATCGCTTGTACAGGATAGGTGG
GAGCCTGGGAAGCCGGAGCGCCAGCTTCGGTGGAGGCGGCGGTGGGATACCACCCTGGCG
GTATTGAAATTCTAACCCGCACCCCTTAGCGGGGTGGGAGACAGTGTCAGGTGGGCAGTT
TGACTGGGGCGGTCGCCTCCCAAAAGGTAACGGAGGCGCCCAAAGGTTCCCTCAGAATGG
TTGGAAATCATTCGGAGAGTGCAAAGGCACAAGGGAGCTTGACTGCGAGACGGACAGGTC
GAGCAGGGACGAAAGTCGGGCTTAGTGATCCGGTGGTTCCGCATGGAAGGGCCATCGCTC
AACGGATAAAAGCTACCCCGGGGATAACAGGCTGATCTCCCCCAAGAGTCCACATCGACG
GGGAGGTTTGGCACCTCGATGTCGGCTCATCGCATCCTGGGGCTGTAGTCGGTCCCAAGG
GTTGGGCTGTTCGCCATTAAAGCGGTACGAGCTGGGTTCAGAACGTCGTGAGACAGTTCG
GTCCCTATCCGTCGCGGGCGGAGGAAATTTGAGAGGAGCTGTCCTTAGTACGAGAGGACC
GGGATGGACGCACCGCTGGTGTACCAGTTGTCCCGCCAGCACCGCTGGGTAGCTATGTGC
GGACGGGATAAGCGCTGAAAGCATCTAAGCGTGAAGCCCCCCTCAAGATGAGATTTCCCA
CCGCGCAAACGGTAAGATCCCTCGAAGATGACGAGGTCGATAGGTCCGAGGTGGAAGCGT
GGTGACACGTGGAGCTGACGGATACTAATCGATCGAGGGCTTAACCAAGAAAAGCGCAGG
CGAGCGGCTTCTTCCAACGGTTATCTAGTTTTGAAGGAATGAAAAAACTCTTGACAGCTC
AGTGACGAGCATTTGCATCATCGGATGAACTGCGAGTTGCCTCGACGCATCCAGCTTCTT
TGAATCGGCTGGCAGAGGAAGAGGCAGGACATCTTGTTGGGAGAATTGACCGTCGTTATG
ATCCGCAGTAGCTCAGTGGTAGAGCAATCGGCTGTTAACCGATTGGTCGCAGGTTCGAAT
CCTGCCTGCGGAGCCATCGTGGAGAGCTGTCCGAGTGGTCGAAGGAGCACGATTGGAAAT
CGTGTAGGCGTGAATAGCGCCTCAAGGGTTCGAATCCCTTGCTCTCCGCCATGATCCATC
AGCATGGCCCGTTGGTCAAGTGGTTAAGACACCGCCCTTTCACGGCGGTAACACGGGTTC
GAATCCGTACGGGTCACTTCTTTGTGGAGGATTAGCTCAGCTGGGAGAGCACTTGCCTTA
CAAGCAAGGGGTCGGCGGTTCGATCCCGTCATCCTCCACCATCTTAATATAAAAGCTGAC
TTGCTTTCTCATTCATCGCGAGTTGTCCCGACGCATCCAGCATCTTTGAATCAACTTGTA
GAGGAAGAGGCAACGGACAACAACACCAAAAGCATTGTGAATATCAACTTACTTTCTTAT
CATCGCGGGGTGGGGCAACGAGCCTTTGCTTCATCGAATCCGCTTCGAGTTGCCTCGACG
CACCGAGCATCTTTGAATCAGCTGGCAGAGGAAGAGGCAATAAATATTTAAACCGATACT
CAGTGTATGATCGAAATAAGTTCATTTTCTATCGTCGCGGGGTGGAGCAGTCCGGTAGCT
CGTCGGGCTCATAACCCGAAGGTCGCAGGTTCAAATCCTGCCCCCGCAACCAAAATTGGT
CCCGTAGTGTAGTGGTTAACATGCCTGCCTGTCACGCAGGAGATCGCGGGTTCGATGCCG
TCGGGACCGCCATTCTTTCAAAATTGTGAAAGATGAAAAATACGGCTCAGCAGCTCATTC
GGTAGAGACGAGATCCGCTTCATCGAATTC
>BSTRNA1 Bacillus subtilis genes for tRNA-Lys(UUU), tRNA-Glu (UUC), tRNA-Asp (GUC) and tRNA-Phe (GAA)
AAAAAAGTTATTGCCACTTCTATTTGTTCGTGATATTATAAATCTCGTTGTTACGGAAAC
TGCTTCAATAGAGTACAAGATGAGAACTAGATTTAAGTCGTTTGCTCTATAGAAATTCCG
ACATCTTTATGAGCCATTAGCTCAGTTGGTAGAGCATCTGACTTTTAATCAGAGGGTCGA
AGGTTCGAGTCCTTCATGGCTCACCATTTCGTGAAGGCCCGTTGGTCAAGCGGTTAAGAC
ACCGCCCTTTCACGGCGGTAACACGGGTTCGAATCCCGTACGGGTCATTGATTTACTTTA
GCGTTATTGCTAAATTCCTTATTTGTCTGTGAGAGCTGACACGACAGCTCTCCGGGCAAT
TACTGTAAGGTCCGGTAGTTCAGTTGGTTAGAATGCCTGCCTGTCACGCAGGAGGTCGCG
GGTTCGAGTCCCGTCCGGACCGCCATTTTACTTTACTGTGGAAAATAAAACATTTGGCTC
GGTAGCTCAGTTGGTAGAGCAACGGACTGAAAATCCGTGTGTCGGCGGTTCGATTCCGTC
CCGAGCCACTTACCAAACGCATCTGCAATCGTAGGTGCGTTTTTTCTTTTAGGAAAAAGG
CAAACATGAGGAGTGTTATAATAGAAGAAAAAGGGAGAACCGGCCCTGCGGCCGGTTCAA
AGAAGAAGACGTCATTGATAAAGACGCACTCCGGTGAGGGGAGGTTTCAATAAAGTTATC
TTTTTTAAAAAAAGT
>BSV1 B.subtilis Val-tRNA-1.
GGAGGATTAGCTCAGCTGGGAGAGCATCTGCCTTACAAGCAGAGGGTCGGCGGTTCGAGC
CCGTCATCCTCCACCA

View File

@ -0,0 +1,233 @@
>AHTRNRHL A. hydrophila DNA for tRNA-Arg, tRNA-His, tRNA-Leu and tRNA-Pro
CGATGGAAAAACAAGCGGTTGATTCTGCGAGATAAAAAAATAGTAGACAA
GGTGGGTCCCCATCATTAATATTGCGCCCCGTTCGACAGCGTAGCGCCCG
TAGCTCAGCTGGATAGAGCGCTGCCCTCCGGAGGCAGAGGTCACAGGTTC
GAATCCTGTCGGGCGCACCATCAAAAGTGCGCCGGTTAAGCGGGCGAGTT
GAAGAACGAAAACAGCTGTGGTGGCTGTAGCTCAGTTGGTAGAGTCCCGG
ATTGTGATTCCGGTTGTCGTGGGTTCGAGCCCCATCAGCCACCCCATTTT
ACAGCTTGTCAGGTATGCGAAGGTGGCGGAATTGGTAGACGCGCTAGCTT
CAGGTGTTAGTGCCCCCCGGGTGTGAGGGTTCGAGTCCCTCTCTTCGCAC
CATACTTGCTGTATGACGAGGATGACCGTTCATGGTCGTCTTTGTGTTTT
GAAAAGAAAGCTTTTTTAAAGCCTCGGTGATTAGCGCAGCCCGGTAGCGC
ATCTGGTTTGGGACCAGAGGGTCAAAGGTTCGAATCCTTTATCACCGACC
ACATTCTGAAAAACCCCGCTCAGGCGGGGTTTTTTGCTTTCTGTCGTCTG
CCAGCCTGCTGCCGCAGGTTT
>ALTRNA11 A.laidlawii 23S rRNA, 5S rRNA, tRNA-Val, tRNA-Thr, tRNA-Lys, tRNA-Leu, tRNA-Ala, tRNA-Met (elongator), tRNA-Ile, tRNA-Ser, tRNA-Met (initiator), tRNA-Asp & tRNA-Phe genes
CAGTACTACAGCTGGGTAGCTATGTGCGGAAGGGATAAACGCTGAAAGCA
TCTAAGCGTGAAGCCCCCCTTAAGATGAGATTTCCCAATTAGTAAGACCC
CTCAAAGACTATGAGGTTGATAGGCCAGGTGTGTAAGTACAGCGATGTAT
TCAGCTTACTGGTACTAATAGGTCGAGGACTTAACCTTTCGTGATGATTC
GCATCACATTCTTTTCAGAGAAAATCATTTATCTAGTTTTGAGAGCATTA
TGTCTGGTGACGATGGCAAGATGGTTCCACCTGTTCCCATCCCGAACACA
GAAGTTAAGCATCTTAGCGCCGACGATAGTTATTACAATTAGCGAAAATA
GGACGTTGCCAGGCATATCATCTCAATTTTATACCATTTTTTGGAGGATT
AGCTCAGTTGGGAGAGCACCTGCCTTACAAGCAGGGGGTCGGCGGTTCAA
GCCCGTCATCCTCCACCACTTTACAAGCCGAAATAGCTCAATCGGTAGAG
CAACTGATTTGTAATCAGTAGGTTGCGGGTTCAATTCCTGTTTTCGGCAC
CACTACAATGTCCCGTTAGCTCAGGTGGTAGAGCACTTGACTTTTAATCA
AGGTGTCGATGGTTCGAGTCCATCACGGGACACCACTTTTATAATTTAAT
GCCCGGGTGGTGAAATCGGTAGACACGCAGGACTTAAAATCCTGTGGCAT
AAAAGCCATGTCGGTTCAAGTCCGACCCCGGGCACCACTAAAAAATTAAT
TAAATTGTTGGGGCTTTAGCTCAGCTGGGAGAGCGCCTGCCTTGCACGCA
GGAGGTCAGCGGTTCGATCCGCTAAGCTCCACCAGTTTACGATTTTAATC
AAGTATCAAAAAGTTTCTAACCAATATATGGCGGTGTAGCTCAGCTGGCT
AGAGCGTGCGGTTCATACCCGCAAGGTCGAGGGTTCAAGTCCCCCCGCCG
CTACCAGTACGGACCCGTAGCTCAGTTGGTTAGAGCTACCGGCTCATAAC
CGGTCGGTCGTTGGTTCGAGTCCAACCGGGTCCACCAGTATTAACTTTTA
AATAAACGTGGAGGAATACCCAAGAGGCTGAAGGGATCGGTCTTGAAAAC
CGACAGGGTGTAAAAGCCGCGGGGGTTCAAATCCCTCTTCCTCCGCCACT
ACAATTTTAAAACCAGTTATGGTATAATAACAACATATCGCGGGATAGAG
CAGTCTGGTAGCTCGTCGGGCTCATAACCCGAAGGTCGATGGTTCAAATC
CATCTCCCGCAACCAAATAAAATTTAAAACGGTCCGGTGGTGTAGGGGTT
AACATGCCTGCCTGTCACGCAGGAGATCGCGGGTTCAAATCCCGTCCGGA
CCGCCATTTTAGTGGCTCTGTAGCTCAGTCGGTAGAGCAGTGGCCTGAAG
AGCCTCGTGTCAGCCGGTTCGATTCCGCTTGGAGCCACCACTTATGAACT
TAGTAAAGTCTGTTAAATACAGGCTTTTTTTATTGTTTAAGTATAGAGTT
TTGGTATAATAGGTTTAAAATAAAGGGGAAAGTTATGAAA
>ALTRNAARG A.laidlawii tRNA-Arg gene
TTCACAATTTGAGTCATAAAAGGCTTTACATTCAGCAGTGAATAGGTTAT
AATAGTCATTGCTGAATAAGTGTGTCCGAATAGCTCAGCTGGATAGAGCA
ATAGCCTTCTAAGCTATCGGTCGGGGGTTCGAATCCCTCTTCGGACGCCA
TCATGGAATTAAATAATGCTACACTTCGGTGTGGCATTTTTTTATGCTTT
TGAAGTATTGTTGATATATGTTTGAAGTAGCATTGAGAAGGAAA
>ALTRNAASN A.laidlawii tRNA-Asn, 23S rRNA & 5SrRNA genes
CCAATTAGTAAGCCCCTCAAAGACTATGAGGTTGATAGGCCAGGTGTGTA
AGTACAGCGATGTATTCAGCTTACTGGTACTAATAGGTCGAGGACTTAAC
CTTTCGTGATGATTCGCATCACATTCTTTTCAGAGAAAATCATTTATCTA
GTTTTGAGAGCATTATGTCTGGTGACGATGGCAAGATGTTCCACCTGTTC
CCTACCCGAATATAGAAGTTAAGCATCTTAGCGCCGACGATAGTTATTAC
AATTAGCGAAAATAGGACGTTGCCAGGCATTTACTTTCTTAAATATTTTA
TATGTAGCTTAGTGTCACCCATGAATTATCAATTGTAAGCCTACTTAGCT
CAGTTGGTTAGAGCACCTGACTGTTAATCAGGGGGTCGCTGGTTCGAGTC
CAGCAGTGGGCGCCACTTTTTTTATAAATTAAAACTCACATGAACTGTGA
GTTTTTTATTTGTCTAAATATGGAGTAATTCATTGATTTTATCTAATAAA
TAAGGGTTAATTTCAAAGTTTCAACTAAAACTTCAACATTCATGCGCAGA
TTCCTTATTT
>ALTRNAGLY A.laidlawii tRNA-Gly gene
TTTTACCAAAGATTTGTGGTATAATTCTTACAAATGAATAAGTTAGAAAA
AAAGTAAATCTTTTTTGAAATAAGGCTTGACAAGTATGGATGCATTTGAT
ATCATAATAAAGCAACATGTCGGTGTCGTATATGGTTATTACAGGCCTTG
CCAAGGCTTAGACGGCAGTTCGATCCTGCTCACCGGCTCCAGTAAATAAT
AAACAAGTTCTCAAGTAAATGCTTGAGGACTTTTTTTATTTTCTTATAAC
TTAATTTGACTTAAAATTTCACCAACTTGA
>ALTRNAHGL A.laidlawii tRNA-His, tRNA-Gln & tRNA-Leu genes
ATCTTGCAATTACTTCCTTTCTTTTATATAATATCAAATATATCTTGTAA
AGGATATAAAAAACTTATTTTTCCGAAATTTTAACTTTACAAAATAAAAT
AAGTTGGTATAATAATACTTGCGCAAAGTATGGCGGTTGTGGCGAAGTGG
TTAACGCATCGGCTTGTGGCGCCGACACTCGGGGGTTCAATTCCCCTCGG
CCGCCCCATAATTGCGAATTAAATAAAATGTAGGCCCATAGCCAAGCGGT
AAGGCAACGGACTTTGACTCCGTCACTCGTAGGTTCAAATCCTGCTGGGC
CTGCCATTTTTATAAAGAAAGTCCACTATCCATGCGGGTGTGGCGAAATT
GGCAGACGCACTAGACTTAGGATCTAGCGCTTTACGGCATGCAGGTTCAA
GTCCTGTCACCCGCACCATATTGAATTGAAATGGTTTGATACAATTTGTA
TCAGACCTTTTTTGTATTTCGGGCGCTATTTATCATTACG
>ALTRNALEU A.laidlawii tRNA-Leu gene
CTCAGATTTAATTGATAAAAATAACAAAAAATTGAATAAAAAGTAGGCTT
ATGCCTGCTTATATCCCCGTGTGGCGAAATGGTAGACGCGCTTGACTCAA
AATCAAGTAGTGAAGACTGTGCTGGTTCGAGTCCGGTCACGGGGACCAAT
CTTAAAGTTAAAGTTTTTCGTGTCCCAACAAGGGATATGAAAAACTTTTG
TTTTAGTAACAGATGTTTCTTTAAAAAAGAGGTATAACCAATGAAAAAAC
AAGTGATTCAACGTACAGAGACCATTGATT
>ALTRNALYS A.laidlawii tRNA-Lys gene
CTTTTATTTTTGTTTGACAAATAAGTAAACTTGATATACAATGATAATGC
TTTATAAAAAATGCATCCATAGCTCAGTTGGTAGAGCAACAGACTCTTAA
TCTGTGGGTCCACGGTTCGAGCCCGTGTGGGTGTACCATTTATATATGAA
AAGGCCATTTGATGAACATATCAAATGGCCTTATTTTTTCGTTATTAAAA
TCAATAATTATTTTACTCTCTTACCTTTTTCATAGTATTTAGACGCCCAT
ATAACTATAGATTATTTTCCCATCTTCTAT
>ALTRNASG A.laidlawii tRNA-Ser & tRNA-Glu genes
TACATCAAATTAGTGATTGACAACTAGAATAAAATAAGTTAATATAATAA
GGCATGATTTAATTCATGCCTACAATGGAGCGATACTCAAGAGGCCGAAG
AGGGCGCACTGCTAACGCGTTAGACGGTTAACCCCGTGCGAGGGTTCAAA
TCCCTCTCGCTCCGCCATTGGCCCGTTGGAGAAACGGTTAACTCACATGC
CTTTCACGCATGCATTCACGGGTTCGAATCCCGTACGGGTCACCATTAAT
TATTGAAATACTCTTCTCACCCGTGGGAAGGTTTTTTTTCTAGACATTTA
TGTAGAGTTTTAAACTCATTAACAGATGTTCTAAGTTGAAAAAACTGAAT
>ALTRNW Acholeplasma laidlawii gene for tRNA-Trp
ATAATCTAATTTCTACACCTATAAGCGCTTATAACAGCCTAAAACACATA
ATTTAATAAAATTAAGTTTTCAATGATATGATTTGTTCATTTTCAATTGA
AAACTTAATTTTGTTATGCTATACTAATAAGGCGTGAACGGGGGCATGGT
GTCAACGGTAGCACACAGGTCTCCAAAACCTTTAGTGTGGGTTCGAATCC
TGCTGCCCTCGCCATCTAATAATAAGCAATGTTGCATACATGCAGCATTT
TTTTTATACACTCGACTATATGCTCTTCTTGCAATAATAGTCTAATTAGG
TTTTTAGTCTAAATCATGCTATAATGAAAAAAGAAAGAGGTGAACAACTA
>ANLCAA Anacystis nidulans Leu-tRNA-CAA.
GGGCAAGTGGCGGAATTGGTAGACGCAGCAGACTCAAAATCTGCCGCTAG
CGATAGTGTGTGGGTTCGAGTCCCACCTTGCCCACCA
>ANMF Anacystis nidulans initiator Met-tRNA-f.
CGCGGGGTAGAGCAGCCTGGTAGCTCGTCGGGCTCATAACCCGAAGGTCA
GAGGTTCAAATCCTCTCCCCGCCACCA
>BSG1 B.subtilis Gly-tRNA-1.
GCGGGTGTAGTTTAGTGGTAAAACCTCAGCCTTCCAAGCTGATGTCGTGA
GTTCGATTCTCATCACCCGCTCCA
>BSMF B.subtilis initiator Met-tRNA-f.
CGCGGGGTGGAGCAGTTCGGTAGCTCGTCGGGCTCATAACCCGAAGGTCG
CAGGTTCAAATCCTGCCCCCGCAACCA
>BSMM B.subtilis Met-tRNA-m.
GGCGGTGTAGCTCAGCGGCTAGAGCGTACGGTTCATACCCGTGAGGTCGG
GGGTTCGATCCCCTCCGCCGCTACCA
>BSTRLC B.stearothermophilus Leu-tRNA-CAA.
GCCGATGTGGCGGAATTGGCAGACGCGCACGACTCAAAATCGTGTGGGCT
TTGCCCGTGTGGGTTCGACTCCCACCATCGGCACCA
>BSTRN Bacillus sp. strain PS3 genes for tRNA-Asn, tRNA-Ser, tRNA-Glu, tRNA-Val, tRNA-Met and tRNA-Asp.
GAATTCAAGAAGCAGTCTCGTTCCTTGAAAACTAGATAACCGATAAAGCA
AAGGAAGAAGCCGAGAGCGCGATAGGTTAAGCTGGAAAGGGCGCACGGTG
GATGCCTTGGCACTAGGAGCCGATGAAGGACGGGGCAAACGCCGAAACGC
TTCGGGGAGCTGTAAGCAAGCGTTGATCCGGAGATGTCCGAATGGGGGAA
CCCACTGTCCGTAATGGGGCAGTATCCATGCCTGAATCCATAGGGCGTGG
AGGGCACACCCGGGGAACTGAAACATCTTAGTACCCGGAGAGAAGAAAGC
AACCGCGATTCCCTGAGTAGCGGCGAGCGAAACGGGAACAGCCCAAACCA
AGAGGCGAGTCCTCTTGGGGTTGTAGGACCGCTCACGATGGGAGTGAGAA
AGGGACGGGGTAGACGAACCGGTCTGGAACGGCCGGCCAGAGAAGGTGAG
AGCCCTGTAGTCGAAACTTCGTTCCCTCCCGAGCGGATCCTGAGTACGGC
GGGACACGAGGAATCCCGTCGGAAGCAGGGAGGACCATCTCCCAAGGCTA
AATACTCCCTAGTGACCGATAGTGCACCAGTACCGTGAGGGAAAGGTGAA
AAGCACCCCGGGAGGGGAGTGAAAGAGAACCTGAAACCGTGTGCCTACAA
GTAGTCAGAGCCCGTTGATGGGTGATGGCGTGCCTTTTGTAGAATGAACC
GGCGAGTGACGATGGCGTGCGAGGTTAAGCCGAAGAGGCGGAGCCGCAGC
GAAAGCGAGTCTGAACAGGGCGTGTGAGTACGTCGTCGTCGACCCGAAAC
CAGGTGATCTACCCATGTCCAGGGTGAAGGCCGGGTAACACCGGCTGGAG
GCCGAACCCACGCACGTTGAAAAGTGCGGGGATGAGGTGTGGGTAGGGGT
GAAATGCCAATCGAACTTGGAGATAGCTGGTTCTCCCCGAAATAGCTTTA
GGGCTAGCCTCGGGTTTAGGAGTCTTGGAGGTAGAGCACTGATTGGGCTA
GGGGCCAAACCGGGTTACCGAACCCAGTCAAACTCCGAATGCCAATGACT
TATGCCCGGGAGTCAGACTGCGAGTGATAAGATCCGTGGTCGAGAGGGGA
ACAGCCCAGACCGCCAGCTAAGGCCCCGAAGTGCACGTTCAGTGGAAAAG
GATGTGGAGTTGCCGAGACAACCAGGATGTTGGCTTAGAAGCAGCCACCA
TTTAAAGAGTGCGTAATAGCTCACTGGTCGAGTGACTCTGCGCCGAAAAT
GTACCGGGGCTAAACGTGCCGCCGAAGCTGCGGGATGACCGTTGGTCATC
GGTAGGGGAGCGTTCTAAGGGCAGAGAAGCCAGACCGGAAGACTGGTGGA
GCGCTTAGAAGTGAGAATGCCGGTATGAGTAGCGAAAACAGAGGTGAGAA
TCCTCTGCGCCGAAAGCCTAAGGGTTCCTGAGGAAGGTTCGTCCGCTCAG
GGTTAGTCGGGACCTAAGCCGAGGCCGAAACGTAGGTGATGGACAACAGG
TTGAGATTCCTGTACCACCTTCTTCCCGTTTGAGCGATGGGGGGACGCAG
GAGGATAGGGCGAGCAGGCGGCTGGAAGAGCCTGTCCAAGCCGTGACGTG
ATCCGCAGGCAAATCCGCGGATCATAAGGCCAAGCGGTGACGGCGACGGA
GTATCCGGAAGTCCCCGATTTCACACTGCCAAGAAAAGCCTCTAGCGAGG
GAAGAGGTGCCCGTACCGCAAACCGACACAGGTAGGCGAGGAGAGAATCC
TAAGGCGCGCGGGAGAACTCTCGTTAAGGAACTCGGCAAAATGACCCCGT
AACTTCGGGAGAAGGGGTGCTCTTTTGGGTGAAGAGCCCTGAAGAGCCGC
AGTGAAAAGGCCCAAGCGACTGTTTATCAAAAACACAGGTCTCTGCGAAG
CCGAAAGGCGACGTATAGGGGCTGACACCTGCCCGGTGCTGGAAGGTTAA
GGGGAGCGCTTAGCGGAAGCGAAGGTGCGAACCGAAGCCCCAGTAAACGG
CGGCCGTAACTATAACGGTCCTAAGGTAGCGAAATTCCTTGTCGGGTAAG
TTCCGACCCGCACGAAAGGTGTAACGACTTGGGCGCTGTCTCAACGAGAG
ACCCGGTGAAATTATACTACCTGTGAAGATGCAGGTTACCCGCGACAGGA
CGGAAAGACCCCGTGGAGCTTTACTGCAGCCTGATATGGAATTTTGGTAT
CGCTTGTACAGGATAGGTGGGAGCCTGGGAAGCCGGAGCGCCAGCTTCGG
TGGAGGCGGCGGTGGGATACCACCCTGGCGGTATTGAAATTCTAACCCGC
ACCCCTTAGCGGGGTGGGAGACAGTGTCAGGTGGGCAGTTTGACTGGGGC
GGTCGCCTCCCAAAAGGTAACGGAGGCGCCCAAAGGTTCCCTCAGAATGG
TTGGAAATCATTCGGAGAGTGCAAAGGCACAAGGGAGCTTGACTGCGAGA
CGGACAGGTCGAGCAGGGACGAAAGTCGGGCTTAGTGATCCGGTGGTTCC
GCATGGAAGGGCCATCGCTCAACGGATAAAAGCTACCCCGGGGATAACAG
GCTGATCTCCCCCAAGAGTCCACATCGACGGGGAGGTTTGGCACCTCGAT
GTCGGCTCATCGCATCCTGGGGCTGTAGTCGGTCCCAAGGGTTGGGCTGT
TCGCCATTAAAGCGGTACGAGCTGGGTTCAGAACGTCGTGAGACAGTTCG
GTCCCTATCCGTCGCGGGCGGAGGAAATTTGAGAGGAGCTGTCCTTAGTA
CGAGAGGACCGGGATGGACGCACCGCTGGTGTACCAGTTGTCCCGCCAGC
ACCGCTGGGTAGCTATGTGCGGACGGGATAAGCGCTGAAAGCATCTAAGC
GTGAAGCCCCCCTCAAGATGAGATTTCCCACCGCGCAAACGGTAAGATCC
CTCGAAGATGACGAGGTCGATAGGTCCGAGGTGGAAGCGTGGTGACACGT
GGAGCTGACGGATACTAATCGATCGAGGGCTTAACCAAGAAAAGCGCAGG
CGAGCGGCTTCTTCCAACGGTTATCTAGTTTTGAAGGAATGAAAAAACTC
TTGACAGCTCAGTGACGAGCATTTGCATCATCGGATGAACTGCGAGTTGC
CTCGACGCATCCAGCTTCTTTGAATCGGCTGGCAGAGGAAGAGGCAGGAC
ATCTTGTTGGGAGAATTGACCGTCGTTATGATCCGCAGTAGCTCAGTGGT
AGAGCAATCGGCTGTTAACCGATTGGTCGCAGGTTCGAATCCTGCCTGCG
GAGCCATCGTGGAGAGCTGTCCGAGTGGTCGAAGGAGCACGATTGGAAAT
CGTGTAGGCGTGAATAGCGCCTCAAGGGTTCGAATCCCTTGCTCTCCGCC
ATGATCCATCAGCATGGCCCGTTGGTCAAGTGGTTAAGACACCGCCCTTT
CACGGCGGTAACACGGGTTCGAATCCGTACGGGTCACTTCTTTGTGGAGG
ATTAGCTCAGCTGGGAGAGCACTTGCCTTACAAGCAAGGGGTCGGCGGTT
CGATCCCGTCATCCTCCACCATCTTAATATAAAAGCTGACTTGCTTTCTC
ATTCATCGCGAGTTGTCCCGACGCATCCAGCATCTTTGAATCAACTTGTA
GAGGAAGAGGCAACGGACAACAACACCAAAAGCATTGTGAATATCAACTT
ACTTTCTTATCATCGCGGGGTGGGGCAACGAGCCTTTGCTTCATCGAATC
CGCTTCGAGTTGCCTCGACGCACCGAGCATCTTTGAATCAGCTGGCAGAG
GAAGAGGCAATAAATATTTAAACCGATACTCAGTGTATGATCGAAATAAG
TTCATTTTCTATCGTCGCGGGGTGGAGCAGTCCGGTAGCTCGTCGGGCTC
ATAACCCGAAGGTCGCAGGTTCAAATCCTGCCCCCGCAACCAAAATTGGT
CCCGTAGTGTAGTGGTTAACATGCCTGCCTGTCACGCAGGAGATCGCGGG
TTCGATGCCGTCGGGACCGCCATTCTTTCAAAATTGTGAAAGATGAAAAA
TACGGCTCAGCAGCTCATTCGGTAGAGACGAGATCCGCTTCATCGAATTC
>BSTRNA1 Bacillus subtilis genes for tRNA-Lys(UUU), tRNA-Glu (UUC), tRNA-Asp (GUC) and tRNA-Phe (GAA)
AAAAAAGTTATTGCCACTTCTATTTGTTCGTGATATTATAAATCTCGTTG
TTACGGAAACTGCTTCAATAGAGTACAAGATGAGAACTAGATTTAAGTCG
TTTGCTCTATAGAAATTCCGACATCTTTATGAGCCATTAGCTCAGTTGGT
AGAGCATCTGACTTTTAATCAGAGGGTCGAAGGTTCGAGTCCTTCATGGC
TCACCATTTCGTGAAGGCCCGTTGGTCAAGCGGTTAAGACACCGCCCTTT
CACGGCGGTAACACGGGTTCGAATCCCGTACGGGTCATTGATTTACTTTA
GCGTTATTGCTAAATTCCTTATTTGTCTGTGAGAGCTGACACGACAGCTC
TCCGGGCAATTACTGTAAGGTCCGGTAGTTCAGTTGGTTAGAATGCCTGC
CTGTCACGCAGGAGGTCGCGGGTTCGAGTCCCGTCCGGACCGCCATTTTA
CTTTACTGTGGAAAATAAAACATTTGGCTCGGTAGCTCAGTTGGTAGAGC
AACGGACTGAAAATCCGTGTGTCGGCGGTTCGATTCCGTCCCGAGCCACT
TACCAAACGCATCTGCAATCGTAGGTGCGTTTTTTCTTTTAGGAAAAAGG
CAAACATGAGGAGTGTTATAATAGAAGAAAAAGGGAGAACCGGCCCTGCG
GCCGGTTCAAAGAAGAAGACGTCATTGATAAAGACGCACTCCGGTGAGGG
GAGGTTTCAATAAAGTTATCTTTTTTAAAAAAAGT
>BSV1 B.subtilis Val-tRNA-1.
GGAGGATTAGCTCAGCTGGGAGAGCATCTGCCTTACAAGCAGAGGGTCGG
CGGTTCGAGCCCGTCATCCTCCACCA
total seq = 19

View File

@ -0,0 +1,52 @@
/* ---------------- */
/* @file: ctest.c */
/* ---------------- */
#include <stdio.h>
#include <stdlib.h>
#include "libfasta.h"
/* ----------------------------------------------- */
main(argn, argv)
int argn;
char *argv[];
{
int nbseq;
FastaSequence *seq;
seq = NewFastaSequence();
nbseq = 0;
while (ReadFastaSequence(stdin, seq)) {
if (! seq->ok)
printf("error\n");
nbseq++;
if (nbseq % 1000 == 999) {
printf("\r%d", nbseq);
fflush(stdout);
}
WriteFastaSequence(stdout, seq, FASTA_CHAR_PER_LINE);
}
FreeFastaSequence(seq);
printf("total seq = %d\n", nbseq);
exit(0);
}

View File

@ -0,0 +1,27 @@
# ---------------------------------------------------------------
# $Id: $
# ---------------------------------------------------------------
# @file: Makefile
# @desc: makefile for sequtils/src
#
# @history:
# @history:
# @+ <Gloup> : Apr 97 : Created
# @+ <Gloup> : Mar 02 : Updated for LXxware
#
# @note: should be processed with gnu compatible make
# @note: helixware_compatible
#
# @end:
# ---------------------------------------------------------------
#
DIRS = Abisrc \
Biosrc \
Fastasrc \
Utilsrc
include ../config/targets/propagate.targ
include ../config/targets/help.targ

BIN
src/sequtils/lxpack/src/Utilsrc/.DS_Store vendored Executable file

Binary file not shown.

View File

@ -0,0 +1,47 @@
# ---------------------------------------------------------------
# @pckg: SeqUtils / V1.0 / Gloup Jan 92
#
# @file: Makefile
# @desc: makefile for Utilsrc / Biological utilities
#
# @history:
# @+ <Gloup> : Jan 96 : Created
# @+ <Gloup> : Jul 93 : revised version
# @+ <Gloup> : Feb 01 : Adapted to helixware
#
# @note: should be processed with gnu compatible make
# @note: helixware_compatible
#
# @end:
# ---------------------------------------------------------------
#
include ../../config/auto.conf
#
# Machine independant flags
#
PROGS = util_complinv \
util_cut \
util_translate \
util_tab_aa \
util_tab_codon \
util_tab_dinuc \
util_tab_nuc \
util_skew \
util_skew_teta \
util_skew_chi_mono \
util_skew_mono \
util_codon_skew \
util_codon_skew_corr
OSRC = $(PROGS:=.c)
include ../../config/targets/lxbin.targ
include ../../config/targets/help.targ
INCDIR = ../../include
LIBS = -lfasta -lbio -laabi

View File

@ -0,0 +1,351 @@
/* ---------------------------------------------------------------- */
/* Copyright (c) Atelier de BioInformatique */
/* @file: util_tab_codon.c */
/* @desc: tabulate codon usage */
/* */
/* @history: */
/* @+ <Gloup> : Jan 96 : PWG version */
/* ---------------------------------------------------------------- */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#ifdef SGI
#include <getopt.h>
#endif
#include "Genetic.h"
#include "libbio.h"
#include "libfasta.h"
#ifndef Max
#define Max(i, j) ((i) > (j) ? (i) : (j))
#endif
#define NB_CODONS 65
#define X_CODON (NB_CODONS-1)
#define NB_AA 21
#define X_AA (NB_AA-1)
typedef char NameString[16];
static char sDna[] = "ACTG";
static char sAA[] = "ACDEFGHIKLMNPQRSTVWY*";
/* ----------------------------------------------- */
static char *sUpper(char *s)
{
char *c;
for (c = s ; *c ; c++) {
if (islower(*c))
*c = toupper(*c);
}
return s;
}
/* ----------------------------------------------- */
static char *sUpperDna(char *s)
{
char *c;
for (c = sUpper(s) ; *c ; c++) {
if (*c == 'U')
*c = 'T';
}
return s;
}
/* ----------------------------------------------- */
/* make string buffer of all codons : */
/* "AAA/AAC/AAT/AAG/ACA/....../GGG/" */
/* ----------------------------------------------- */
static void sMakeStdCodons(char *buffer)
{
int i, j, k;
for (i = 0 ; i < 4 ; i++)
for (j = 0 ; j < 4 ; j++)
for (k = 0 ; k < 4 ; k++) {
*buffer++ = sDna[i];
*buffer++ = sDna[j];
*buffer++ = sDna[k];
*buffer++ = '/';
}
*buffer = '\000';
}
/* ----------------------------------------------- */
/* make string buffer of synonymous codons coding */
/* for aa */
static void sMakeAaCodons(char *buffer, int aa, int code)
{
int aai;
char *c, codons[4*NB_CODONS+1];
sMakeStdCodons(codons);
for (c = codons ; *c ; c += 4) {
aai = bio_codon_translate(c, code);
if (aai == aa) {
strncpy(buffer, c, 3);
buffer[3] = '/';
buffer += 4;
}
}
*buffer = '\000';
}
/* ----------------------------------------------- */
/* return index of codon (range [0, X_CODONS]) */
/* return -1 if a symbol is not found */
static int sCodonIndex(char *codon)
{
int i, h;
char *p;
for (i = h = 0 ; i < 3 ; i++) {
if (! (p = strchr(sDna, codon[i])))
return -1;
h = (h << 2) | (int) (p - sDna);
}
return h;
}
/* ----------------------------------------------- */
/* return index of aa encoded by codon */
/* or -1 if not found */
static int sAaIndex(char *codon, int code)
{
int aa;
char *paa;
aa = bio_codon_translate(codon, code);
if ((paa = strchr(sAA, aa)) != 0)
return (int) (paa - sAA);
return -1;
}
/* ----------------------------------------------- */
/* count # occurences of char mark in buffer */
static int sCount(char *buffer, int mark)
{
int count;
for (count = 0 ; *buffer ; buffer++) {
if (*buffer == mark)
count++;
}
return count;
}
/* ----------------------------------------------- */
/* skew scores of codon */
static float sCodonScore(char *codon, int symb, int code) {
int iaa, nsyn, n0, nM;
char synon[4*NB_CODONS+1], buf[4];
strncpy(buf, codon, 3);
buf[3] = '\000';
iaa = sAaIndex(buf, code);
sMakeAaCodons(synon, sAA[iaa], code);
n0 = sCount(buf, symb);
nsyn = strlen(synon) / 4;
nM = sCount(synon, symb);
return ((float) (n0) - ((float) (nM) / (float) nsyn));
}
/* ----------------------------------------------- */
main(argn, argv)
int argn;
char *argv[];
{
int i, j, k, imin, imax, nbseq, code, ncod;
int opt, sa_flag, so_flag, p_flag;
float sscore[4], score[4][NB_CODONS];
FastaSequence *seq;
char *trip;
char codons[4*NB_CODONS+1];
extern char *optarg; /* externs for getopts (3C) */
code = 0; /* universal genetic code */
sa_flag = 0; /* consider first codon */
so_flag = 0; /* consider last codon */
p_flag = 0; /* no pretty print */
sMakeStdCodons(codons);
/* ---------------------------- */
/* parse arguments */
/* ---------------------------- */
while ((opt = getopt(argn, argv, "c:hsSp")) != -1) {
switch (opt) {
case 'c':
if ( (sscanf(optarg, "%d", &code) != 1)
|| (code < 0) || (code > 8)) {
(void) printf("bad code value: -c (0-8)\n");
exit(5);
}
break;
case 'h':
(void) printf("codon GC skew\n");
(void) printf("usage: codon_skew [-c code]\n");
(void) printf(" [-s] [-S] [-p]\n");
(void) printf(" -c code\n");
(void) printf(" 0 : universal\n");
(void) printf(" 1 : mito yeast\n");
(void) printf(" 2 : mito vertebrate\n");
(void) printf(" 3 : filamentous fungi\n");
(void) printf(" 4 : mito insects & platyhelminthes\n");
(void) printf(" 5 : Candida cylindracea\n");
(void) printf(" 6 : Ciliata\n");
(void) printf(" 7 : Euplotes\n");
(void) printf(" 8 : mito echinoderms\n");
(void) printf(" -s : ignore first (start) codon\n");
(void) printf(" -S : ignore last (stop) codon\n");
(void) printf(" -p : pretty print codon score\n");
exit(0);
break;
case 's':
sa_flag = 1;
break;
case 'S':
so_flag = 1;
break;
case 'p':
p_flag = 1;
break;
case '?':
(void) printf("usage: codon_skew [-h] [-c code]\n");
(void) printf(" [-s] [-S] [-p]\n");
exit(6);
break;
}
}
/* ---------------------------- */
/* check usage */
/* ---------------------------- */
/* ------------------------------- */
/* precompute score for each codon */
/* ------------------------------- */
for (j = 0 ; j < 4 ; j++) {
for (i = 0, trip = codons ; i < X_CODON ; i++, trip += 4) {
score[j][i] = sCodonScore(trip, sDna[j], code);
}
}
seq = NewFastaSequence();
nbseq = 0;
if (p_flag) {
for (trip = codons ; *trip ; trip += 4) {
printf("%3.3s", trip);
k = sAaIndex(trip, code);
printf("/%1c", sAA[k]);
k = sCodonIndex(trip);
for (j = 0; j < 4 ; j++)
printf(" %6.2f", score[j][k]);
printf("\n");
}
printf("\n");
}
/* ---------------------------- */
/* loop on sequences */
/* ---------------------------- */
while (ReadFastaSequence(stdin, seq)) {
nbseq++;
if (! seq->ok)
(void) printf("error at seq # %d\n", nbseq);
/* -------------------------------- */
/* compute score */
/* -------------------------------- */
for (j = 0 ; j < 4 ; j++)
sscore[j] = 0.;
ncod = 0;
imin = (sa_flag ? 3 : 0);
imax = seq->length - (so_flag ? 3 : 0);
for (i = imin ; i < imax ; i += 3) {
k = sCodonIndex(seq->seq + i);
if (k >= 0) {
for (j = 0 ; j < 4 ; j++) {
sscore[j] += (score[j][k] > 0 ? 1 : (score[j][k] < 0 ? -1 : 0));
}
ncod++;
}
else
fprintf(stderr, "invalid codon %3.3s at position %d in sequence %s\n",
seq->seq + i, i+1, seq->name);
}
for (j = 0 ; j < 4 ; j++)
sscore[j] /= (float) ncod;
seq->comment[30] = '\000';
for (j = 0 ; j < 4 ; j++)
printf("%6.3f ", sscore[j]);
printf(" %s %s\n", seq->name, seq->comment);
}
/* ---------------------------- */
/* end of read loop */
/* ---------------------------- */
/* ---------------------------- */
/* free memory */
/* ---------------------------- */
FreeFastaSequence(seq);
exit(0);
/*NOTREACHED*/
}

View File

@ -0,0 +1,445 @@
/* ---------------------------------------------------------------- */
/* Copyright (c) Atelier de BioInformatique */
/* @file: util_tab_codon.c */
/* @desc: tabulate codon usage */
/* */
/* @history: */
/* @+ <Gloup> : Jan 96 : PWG version */
/* ---------------------------------------------------------------- */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#ifdef SGI
#include <getopt.h>
#endif
#include "Genetic.h"
#include "libbio.h"
#include "libfasta.h"
#ifndef Max
#define Max(i, j) ((i) > (j) ? (i) : (j))
#endif
#define NB_CODONS 65
#define NB_AA 21
#define MIN_STORAGE 1000
typedef char NameString[16];
typedef struct s_Storage {
NameString name;
int counts[NB_CODONS];
} Storage;
static char sDna[] = "ACTG";
static char sAA[] = "ACDEFGHIKLMNPQRSTVWY*";
static char sCodon[4*NB_CODONS+1];
/* ----------------------------------------------- */
static char *sUpper(char *s)
{
char *c;
for (c = s ; *c ; c++) {
if (islower(*c))
*c = toupper(*c);
}
return s;
}
/* ----------------------------------------------- */
static char *sUpperDna(char *s)
{
char *c;
for (c = sUpper(s) ; *c ; c++) {
if (*c == 'U')
*c = 'T';
}
return s;
}
/* ----------------------------------------------- */
/* make string buffer of all codons : */
/* "AAA/AAC/AAT/AAG/ACA/....../GGG/" */
/* ----------------------------------------------- */
static void sMakeStdCodons()
{
int i, j, k;
char *buf = sCodon;
for (i = 0 ; i < 4 ; i++)
for (j = 0 ; j < 4 ; j++)
for (k = 0 ; k < 4 ; k++) {
*buf++ = sDna[i];
*buf++ = sDna[j];
*buf++ = sDna[k];
*buf++ = '/';
}
*buf = '\000';
}
/* ----------------------------------------------- */
/* make string buffer of synonymous codons coding */
/* for aa */
static void sMakeAaCodons(char *buffer, int aa, int code)
{
int aai;
char *c;
for (c = sCodon ; *c ; c += 4) {
aai = bio_codon_translate(c, code);
if (aai == aa) {
strncpy(buffer, c, 3);
buffer[3] = '/';
buffer += 4;
}
}
*buffer = '\000';
}
/* ----------------------------------------------- */
/* return index of codon (range [0, NB_CODONS[) */
/* return -1 if a symbol is not found */
static int sCodonIndex(char *codon)
{
int i, h;
char *p;
for (i = h = 0 ; i < 3 ; i++) {
if (! (p = strchr(sDna, codon[i])))
return -1;
h = (h << 2) | (int) (p - sDna);
}
return h;
}
/* ----------------------------------------------- */
/* return index of aa encoded by codon */
/* or -1 if not found */
static int sAaIndex(char *codon, int code)
{
int aa;
char *paa;
aa = bio_codon_translate(codon, code);
if ((paa = strchr(sAA, aa)) != 0)
return (int) (paa - sAA);
return -1;
}
/* ----------------------------------------------- */
/* count # occurences of char mark in buffer */
static int sCount(char *buffer, int mark)
{
int count;
for (count = 0 ; *buffer ; buffer++) {
if (*buffer == mark)
count++;
}
return count;
}
/* ----------------------------------------------- */
/* compute relative synonymous frequency of codons */
static void sSynFrequency(int *count, float *freq, int code) {
int icod, k, iaa, sum;
char synon[4*NB_CODONS+1], buf[4];
char *trip;
buf[3] = '\000';
for (icod = 0 ; icod < (NB_CODONS-1) ; icod++) {
strncpy(buf, sCodon + 4 * icod, 3);
iaa = sAaIndex(buf, code);
sMakeAaCodons(synon, sAA[iaa], code);
sum = 0;
for (trip = synon ; *trip ; trip += 4) {
k = sCodonIndex(trip);
sum += count[k];
}
freq[icod] = (float) count[icod] / (float) sum;
}
}
/* ----------------------------------------------- */
/* skew score of codon */
static float sCodonScore(char *codon, float *freq, int code) {
int iaa, icod;
float xs0, xsM;
char synon[4*NB_CODONS+1], buf[4];
char *trip;
strncpy(buf, codon, 3);
buf[3] = '\000';
iaa = sAaIndex(buf, code);
sMakeAaCodons(synon, sAA[iaa], code);
xs0 = (float) (sCount(buf, 'G') - sCount(buf, 'C'));
xsM = 0.;
for (trip = synon ; *trip ; trip += 4) {
strncpy(buf, trip, 3);
icod = sCodonIndex(buf);
xsM += (float) (sCount(buf, 'G') - sCount(buf, 'C')) * freq[icod];
}
return (xs0 - xsM);
}
/* ----------------------------------------------- */
static Storage *sIncreaseStorage(Storage *store, int *size)
{
int nsiz;
Storage *new;
nsiz = Max(*size * 2, MIN_STORAGE);
if (store)
new = (Storage *) realloc(store, nsiz * sizeof(Storage));
else
new = (Storage *) malloc(nsiz * sizeof(Storage));
if (new)
*size = nsiz;
return new;
}
/* ----------------------------------------------- */
static void sCopyStorage(Storage *store, int *counts, char *name)
{
int i;
for (i = 0 ; i < NB_CODONS ; i++)
store->counts[i] = counts[i];
(void) strncpy(store->name, name, sizeof(NameString));
store->name[sizeof(NameString)-1] = '\000';
}
/* ----------------------------------------------- */
main(argn, argv)
int argn;
char *argv[];
{
int i, k, imin, imax, nbseq, code, nstore, sign, count, sum;
int opt, sa_flag, so_flag, p_flag;
int counts[NB_CODONS], totcd[NB_CODONS];
float freq[NB_CODONS], score[NB_CODONS], sscore;
Storage *store;
FastaSequence *seq;
char *trip;
extern char *optarg; /* externs for getopts (3C) */
code = 0; /* universal genetic code */
sa_flag = 0; /* consider first codon */
so_flag = 0; /* consider last codon */
p_flag = 0; /* no pretty print */
nstore = 0;
store = NULL;
sMakeStdCodons();
/* ---------------------------- */
/* parse arguments */
/* ---------------------------- */
while ((opt = getopt(argn, argv, "c:hsSp")) != -1) {
switch (opt) {
case 'c':
if ( (sscanf(optarg, "%d", &code) != 1)
|| (code < 0) || (code > 8)) {
(void) printf("bad code value: -c (0-8)\n");
exit(5);
}
break;
case 'h':
(void) printf("codon GC skew\n");
(void) printf("usage: codon_skew [-c code]\n");
(void) printf(" [-s] [-S] [-p]\n");
(void) printf(" -c code\n");
(void) printf(" 0 : universal\n");
(void) printf(" 1 : mito yeast\n");
(void) printf(" 2 : mito vertebrate\n");
(void) printf(" 3 : filamentous fungi\n");
(void) printf(" 4 : mito insects & platyhelminthes\n");
(void) printf(" 5 : Candida cylindracea\n");
(void) printf(" 6 : Ciliata\n");
(void) printf(" 7 : Euplotes\n");
(void) printf(" 8 : mito echinoderms\n");
(void) printf(" -s : ignore first (start) codon\n");
(void) printf(" -S : ignore last (stop) codon\n");
(void) printf(" -p : pretty print codon score\n");
exit(0);
break;
case 's':
sa_flag = 1;
break;
case 'S':
so_flag = 1;
break;
case 'p':
p_flag = 1;
break;
case '?':
(void) printf("usage: codon_skew [-h] [-c code]\n");
(void) printf(" [-s] [-S] [-p]\n");
exit(6);
break;
}
}
/* ---------------------------- */
/* check usage */
/* ---------------------------- */
/* ---------------------------- */
/* loop on sequences */
/* ---------------------------- */
for (i = 0 ; i < NB_CODONS ; i++)
totcd[i] = 0;
seq = NewFastaSequence();
nbseq = 0;
while (ReadFastaSequence(stdin, seq)) {
nbseq++;
if (! seq->ok)
(void) printf("error at seq # %d\n", nbseq);
/* -------------------------------- */
/* compute counts */
/* -------------------------------- */
for (i = 0 ; i < NB_CODONS ; i++)
counts[i] = 0;
imin = (sa_flag ? 3 : 0);
imax = seq->length - (so_flag ? 3 : 0);
for (i = imin ; i < imax ; i += 3) {
k = sCodonIndex(seq->seq + i);
if (k >= 0)
counts[k]++;
else
fprintf(stderr, "invalid codon %3.3s at position %d in sequence %s\n",
seq->seq + i, i+1, seq->name);
}
/* -------------------------------- */
/* compute grand total */
/* -------------------------------- */
for (i = 0 ; i < NB_CODONS ; i++)
totcd[i] += counts[i];
/* -------------------------------- */
/* store counts */
/* -------------------------------- */
if (nstore <= nbseq) {
store = sIncreaseStorage(store, &nstore);
if (! store) {
fprintf(stderr,"not enough memory for %d sequences\n", nbseq);
exit(10);
}
}
sCopyStorage(store + nbseq - 1, counts, seq->name);
}
/* ------------------------------- */
/* compute score for each codon */
/* ------------------------------- */
sSynFrequency(totcd, freq, code);
for (i = 0, trip = sCodon ; *trip ; i++, trip += 4) {
score[i] = sCodonScore(trip, freq, code);
}
if (p_flag) {
for (trip = sCodon ; *trip ; trip += 4) {
printf("%3.3s", trip);
k = sAaIndex(trip, code);
printf("/%1c", sAA[k]);
k = sCodonIndex(trip);
printf(" %5.3f", freq[k]);
printf(" %6.2f\n", score[k]);
}
printf("\n");
}
/* ------------------------------- */
/* compute score for each sequence */
/* ------------------------------- */
for (i = 0 ; i < nbseq ; i++) {
sscore = 0.;
sum = 0;
for (k = 0 ; k < NB_CODONS ; k++) {
sign = (score[k] > 0 ? 1 : (score[k] < 0 ? -1 : 0));
count = store[i].counts[k];
sum += count;
sscore += (float) sign * (float) count;
}
sscore /= (float) sum;
printf("%s %6.3f\n", store[i].name, sscore);
}
/* ---------------------------- */
/* free memory */
/* ---------------------------- */
FreeFastaSequence(seq);
free(store);
exit(0);
/*NOTREACHED*/
}

View File

@ -0,0 +1,80 @@
/* ---------------------------------------------------------------- */
/* Copyright (c) Atelier de BioInformatique */
/* @file: util_complinv.c */
/* @desc: complinv a fasta sequence */
/* */
/* @history: */
/* @+ <Gloup> : Jan 96 : PWG version */
/* ---------------------------------------------------------------- */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef SGI
#include <getopt.h>
#endif
#include "libbio.h"
#include "libfasta.h"
/* ----------------------------------------------- */
/*ARGSUSED*/
main(argn, argv)
int argn;
char *argv[];
{
int nbseq, opt;
FastaSequence *seq;
while ((opt = getopt(argn, argv, "h")) != -1) {
switch (opt) {
case 'h':
(void) printf("complement invert fasta sequence[s]\n");
(void) printf("usage: complinv\n");
exit(0);
break;
case '?':
(void) printf("usage: complinv\n");
exit(6);
break;
}
}
seq = NewFastaSequence();
nbseq = 0;
while (ReadFastaSequence(stdin, seq)) {
nbseq++;
if (! seq->ok) {
(void) printf("error at seq # %d\n", nbseq);
continue;
}
(void) bio_seq_complement(seq->seq);
(void) str_reverse_string(seq->seq);
(void) strcat(seq->comment, " (complement)");
WriteFastaSequence(stdout, seq, FASTA_CHAR_PER_LINE);
}
FreeFastaSequence(seq);
exit(0);
/*NOTREACHED*/
}

View File

@ -0,0 +1,221 @@
/* ---------------------------------------------------------------- */
/* Copyright (c) Atelier de BioInformatique */
/* @file: util_cut.c */
/* @desc: cut a fasta sequence */
/* */
/* @history: */
/* @+ <Gloup> : Jan 96 : PWG version */
/* ---------------------------------------------------------------- */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef SGI
#include <getopt.h>
#endif
#include "libbio.h"
#include "libfasta.h"
/* ----------------------------------------------- */
/* safely copy overlapping strings */
static void sStrCpy(char *s1, char *s2)
{
while (*s2)
*s1++ = *s2++;
*s1 = '\000';
}
/* ----------------------------------------------- */
/* printout sequence */
static void sPrintSequence(FastaSequence *seq, int from, int to, int rev, char *name) {
int ifrom, ito, ilen;
static FastaSequence *bufSeq = NULL;
if (bufSeq == NULL)
bufSeq = NewFastaSequence();
ifrom = ((from > 0) ? from : seq->length + from);
ito = ((to > 0) ? to : seq->length + to);
if ((ito > seq->length) || (ifrom > ito)) {
fprintf(stderr, "bad from to values (%d %d)\n", from, to);
return;
}
ilen = ito - ifrom + 1;
if (bufSeq->length <= ilen) {
bufSeq->seq = REALLOC(char, bufSeq->seq, ilen + 1);
if (! bufSeq->seq) {
(void) fprintf(stderr, "not enough memory\n");
exit(10);
}
}
(void) strncpy(bufSeq->seq, seq->seq + ifrom - 1, ilen);
bufSeq->seq[ilen] = '\000';
bufSeq->length = ilen;
(void) strcpy(bufSeq->name, name ? name : seq->name);
(void) sprintf(bufSeq->comment, "fragment %d %d %s", ifrom, ito, (rev ? "-" : "+"));
if (rev) {
(void) bio_seq_complement(bufSeq->seq);
(void) str_reverse_string(bufSeq->seq);
}
WriteFastaSequence(stdout, bufSeq, FASTA_CHAR_PER_LINE);
}
/* ----------------------------------------------- */
/*ARGSUSED*/
main(argn, argv)
int argn;
char *argv[];
{
int nbseq, from, to, opt, ropt, iread;
FastaSequence *seq;
FILE *filin;
char strand[2], iopt[FILENAME_MAX], buffer[BUFSIZ], name[BUFSIZ];
extern char *optarg; /* externs for getopts (3C) */
ropt = 0;
*iopt = '\000';
from = 1;
to = 0;
while ((opt = getopt(argn, argv, "f:ht:ri:")) != -1) {
switch (opt) {
case 'h':
(void) printf("cut fasta sequence[s]\n");
(void) printf("usage: cut [-f from] [-t to] [-r] [-i file]\n");
(void) printf(" cut fragment [from, to] (inclusive)\n");
(void) printf(" -f from\n");
(void) printf(" from > 0 : range is [from, to]\n");
(void) printf(" from <= 0 : range is [N-|from|, to]\n");
(void) printf(" N = sequence length\n");
(void) printf(" default: 1\n");
(void) printf(" -t to\n");
(void) printf(" to > 0 : range is [from, to]\n");
(void) printf(" to <= 0 : range is [from, N-|to|]\n");
(void) printf(" N = sequence length\n");
(void) printf(" default: 0\n");
(void) printf(" -r\n");
(void) printf(" reverse complement result sequence\n");
(void) printf(" -i file\n");
(void) printf(" use from,to boundaries from list file\n");
(void) printf(" format : from to [D|R|C|+|- [name]] per line\n");
exit(0);
break;
case 'f':
if (sscanf(optarg, "%d", &from) != 1) {
(void) fprintf(stderr, "bad from value\n");
exit(3);
}
break;
case 't':
if (sscanf(optarg, "%d", &to) != 1) {
(void) fprintf(stderr, "bad to value\n");
exit(3);
}
break;
case 'r':
ropt = 1;
break;
case 'i':
if (sscanf(optarg, "%s", iopt) != 1) {
(void) fprintf(stderr, "bad file value\n");
exit(3);
}
break;
case '?':
(void) fprintf(stderr, "usage: cut [-f from] [-t to] [-h]\n");
exit(6);
break;
}
}
if (*iopt && (! (filin = fopen(iopt, "r")))) {
(void) fprintf(stderr, "%s: file not found\n", iopt);
exit(7);
}
seq = NewFastaSequence();
nbseq = 0;
if (! *iopt) {
while (ReadFastaSequence(stdin, seq)) {
nbseq++;
if (! seq->ok) {
(void) fprintf(stderr, "error at seq # %d\n", nbseq);
continue;
}
sPrintSequence(seq, from, to, ropt, seq->name);
}
}
else {
ReadFastaSequence(stdin, seq);
nbseq++;
if (! seq->ok) {
(void) fprintf(stderr, "error at seq # %d\n", nbseq);
exit(8);
}
while (fgets(buffer, sizeof(buffer), filin)) {
iread = sscanf(buffer, "%d%d%1s%s", &from, &to, strand, name);
if (iread < 2) {
fprintf(stderr, "ignored boundaries at line \"%s\"\n", buffer);
continue;
}
if (iread >= 3) {
ropt = (((*strand == '-') || (*strand == 'R') || (*strand == 'C')) ? 1 : 0);
}
if (iread < 4) {
(void) strcpy(name, seq->name);
}
sPrintSequence(seq, from, to, ropt, name);
}
fclose(filin);
}
FreeFastaSequence(seq);
exit(0);
/*NOTREACHED*/
}

View File

@ -0,0 +1,177 @@
/* ---------------------------------------------------------------- */
/* Copyright (c) Atelier de BioInformatique */
/* @file: util_skew.c */
/* @desc: compute GC skew */
/* */
/* @history: */
/* @+ <Gloup> : Jan 96 : PWG version */
/* ---------------------------------------------------------------- */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef SGI
#include <getopt.h>
#endif
#include "libbio.h"
#include "libfasta.h"
#define WINDOW_DFT -25 /* aka 25% */
#define STEP_DFT 10000
#define NB_NUC 4
#define A 0
#define C 1
#define G 2
#define T 3
static char sDna[] = "ACGT";
/* ----------------------------------------------- */
static char *sIndexSeq(char *seq, char *alpha) {
int len;
char *c, *pos;
len = strlen(alpha);
for (c = seq; *c ; c++) {
if (pos = strchr(alpha, *c))
*c = (int) (pos - alpha);
else
*c = len;
}
return seq;
}
/* ----------------------------------------------- */
static void sCount(char *seq, int from, int to, int len, int *count, int factor) {
int i, j;
if (factor == 0) {
for (i = 0 ; i < NB_NUC ; i++)
count[i] = 0;
factor = 1;
}
for (i = from ; i < to ; i++) {
j = seq[i%len];
if (j >= 0)
count[j] += factor;
}
}
/* ----------------------------------------------- */
/*ARGSUSED*/
main(argn, argv)
int argn;
char *argv[];
{
int nbseq, opt, len, pos;
int window, width, step;
int count[NB_NUC];
float skew;
FastaSequence *seq;
extern char *optarg; /* externs for getopts (3C) */
window = WINDOW_DFT; /* default window size */
step = STEP_DFT; /* default step size */
while ((opt = getopt(argn, argv, "hs:w:W:")) != -1) {
switch (opt) {
case 'h':
(void) printf("compute GC skew\n");
(void) printf("usage: util_skew [-w width] [-s step]\n");
exit(0);
break;
case 's':
if ( (sscanf(optarg, "%d", &step) != 1)
|| (step <= 0) ) {
(void) printf("bad step value\n");
exit(5);
}
break;
case 'w':
if ( (sscanf(optarg, "%d", &window) != 1)
|| (window <= 0) ) {
(void) printf("bad window value\n");
exit(5);
}
break;
case 'W':
if ( (sscanf(optarg, "%d", &window) != 1)
|| (window <= 0)
|| (window > 100)) {
(void) printf("bad window percent value\n");
exit(5);
}
window = -window;
break;
case '?':
(void) printf("usage: util_skew [-w width] [-s step]\n");
exit(6);
break;
}
}
seq = NewFastaSequence();
nbseq = 0;
while (ReadFastaSequence(stdin, seq)) {
nbseq++;
if (! seq->ok) {
(void) printf("error at seq # %d\n", nbseq);
continue;
}
len = seq->length - width;
if (window > 0)
width = window;
else
width = len * (-window) / 100;
(void) sIndexSeq(seq->seq, sDna);
skew = 0.;
for (pos = 0 ; pos < len ; pos += step) {
if ((pos == 0) || (width < step)) {
sCount(seq->seq, pos + len - width, pos + len + width, len, count, 0);
}
else {
sCount(seq->seq, pos - step + len - width, pos + len - width, len, count, -1);
sCount(seq->seq, pos - step + width, pos + width, len, count, 1);
}
skew += (float) (count[G] - count[C]) / (float) (count[G] + count[C]);
printf("%d %f\n", pos, skew);
}
}
FreeFastaSequence(seq);
exit(0);
/*NOTREACHED*/
}

View File

@ -0,0 +1,377 @@
/* ---------------------------------------------------------------- */
/* Copyright (c) Atelier de BioInformatique */
/* @file: util_skew.c */
/* @desc: compute GC skew */
/* */
/* @history: */
/* @+ <Gloup> : Jan 96 : PWG version */
/* ---------------------------------------------------------------- */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#ifdef SGI
#include <getopt.h>
#endif
#include "libbio.h"
#include "libfasta.h"
#define WINDOW_DFT -25 /* aka 25% */
#define STEP_DFT 300
#define NB_NUC 4
static char sDna[] = "ACGT";
/* ----------------------------------------------- */
/* remove invalid and duplicate symbols in */
/* alphabet */
static int sCleanAlpha(char *alpha, char *clean) {
char *c, *cc;
for (c = alpha, cc = clean; *c ; c++) {
if ( (strchr(sDna, *c) != NULL)
&& (strchr(alpha, *c) == c))
*cc++ = *c;
}
*cc = '\000';
return strlen(clean);
}
/* ----------------------------------------------- */
static char *sIndexSeq(char *seq, char *alpha) {
int len;
char *c, *pos;
len = strlen(alpha);
for (c = seq; *c ; c++) {
if (strchr(sDna, *c) == NULL)
*c = -1;
else if ((pos = strchr(alpha, *c)) != NULL)
*c = (int) (pos - alpha);
else
*c = len;
}
return seq;
}
/* ----------------------------------------------- */
static void sCount(char *seq, int from, int to, int len, int *count, int factor) {
int i, j;
if (factor == 0) {
for (i = 0 ; i < NB_NUC ; i++)
count[i] = 0;
factor = 1;
}
for (i = from ; i < to ; i++) {
j = seq[i%len];
if (j >= 0)
count[j] += factor;
}
}
/* ----------------------------------------------- */
/* ----------------------------------------------- */
#define SQRTF(x) (float) sqrt((float) x)
#define FABSF(x) (float) fabs((float) x)
/* ---------------------- */
/* Numerical Recipes standard error handler */
static void nrerror(char *error_text)
{
fprintf(stderr, "Numerical Recipes run-time error...\n");
fprintf(stderr, "%s\n", error_text);
exit(1);
}
/* ---------------------- */
static float gammln(float xx)
{
double x, y, tmp, ser;
static double cof[6] = {
76.18009172947146, -86.50532032941677,
24.01409824083091, -1.231739572450155,
0.1208650973866179e-2, -0.5395239384953e-5
};
int j;
y = x = xx;
tmp = x + 5.5;
tmp -= (x+0.5) * log(tmp);
ser = 1.000000000190015;
for (j = 0 ; j <= 5 ; j++)
ser += cof[j] / ++y;
return -tmp + log(2.5066282746310005 * ser / x);
}
/* ---------------------- */
#define ITMAX 100
#define EPS 3.0e-7
#define FPMIN 1.0e-30
static void gcf(float *gammcf, float a, float x, float *gln)
{
int i;
float an, b, c, d, del, h;
*gln = gammln(a);
b = x + 1.0 - a;
c = 1.0/FPMIN;
d = 1.0/b;
h = d;
for (i = 1 ; i <= ITMAX ; i++) {
an = -i*(i-a);
b += 2.0;
d = an*d + b;
if (FABSF(d) < FPMIN)
d = FPMIN;
c = b + an/c;
if (FABSF(c) < FPMIN)
c=FPMIN;
d = 1.0/d;
del = d*c;
h *= del;
if (FABSF(del-1.0) < EPS)
break;
}
if (i > ITMAX)
nrerror("a too large, ITMAX too small in gcf");
*gammcf = exp (-x+a*log(x)-(*gln)) * h;
}
#undef ITMAX
#undef EPS
#undef FPMIN
/* ---------------------- */
#define ITMAX 100
#define EPS 3.0e-7
static void gser(float *gamser, float a, float x, float *gln)
{
int n;
float sum, del, ap;
*gln = gammln(a);
if (x <= 0.0) {
if (x < 0.0)
nrerror("x less than 0 in routine gser");
*gamser=0.0;
return;
} else {
ap = a;
del = sum = 1.0/a;
for (n = 1 ; n <= ITMAX ; n++) {
++ap;
del *= x/ap;
sum += del;
if (FABSF(del) < FABSF(sum)*EPS) {
*gamser = sum * exp(-x+a*log(x)-(*gln));
return;
}
}
nrerror("a too large, ITMAX too small in routine gser");
return;
}
}
#undef ITMAX
#undef EPS
/* ---------------------- */
static float gammq(float a, float x)
{
float gamser,gammcf,gln;
if (x < 0.0 || a <= 0.0)
nrerror("Invalid arguments in routine gammq");
if (x < (a+1.0)) {
gser(&gamser, a, x, &gln);
return 1.0-gamser;
} else {
gcf(&gammcf, a, x, &gln);
return gammcf;
}
}
/* ---------------------- */
void chstwo(int bins1[], int bins2[], int nbins,
float *df, float *chsq, float *prob)
{
int j;
float temp, s1, s2, c1, c2;
s1 = s2 = 0.;
for (j=0 ; j<nbins ; j++) {
s1 += bins1[j];
s2 += bins2[j];
}
c1 = SQRTF(s2/s1);
c2 = SQRTF(s1/s2);
*df = nbins-1;
*chsq = 0.0;
for (j = 0 ; j < nbins ; j++) {
if ((bins1[j] == 0) && (bins2[j] == 0))
--(*df);
else {
temp = bins1[j]*c1 - bins2[j]*c2;
*chsq += temp*temp/(bins1[j]+bins2[j]);
}
}
*prob = gammq(0.5*(*df),0.5*(*chsq));
}
/* ----------------------------------------------- */
/*ARGSUSED*/
main(argn, argv)
int argn;
char *argv[];
{
int nbseq, opt, len, nbins, pos, sign;
int countLeft[NB_NUC], countRight[NB_NUC];
int window, width, step;
char alpha[NB_NUC + 1];
float chi, proba, df;
FastaSequence *seq;
extern char *optarg; /* externs for getopts (3C) */
step = STEP_DFT; /* default step size */
window = WINDOW_DFT; /* defaut window = 50% */
(void) strcpy(alpha, sDna);
while ((opt = getopt(argn, argv, "a:hs:w:W:")) != -1) {
switch (opt) {
case 'a':
if ( (strlen(optarg) > NB_NUC)
|| (sCleanAlpha(optarg, alpha) == 0)) {
(void) printf("bad alpha value\n");
exit(5);
}
break;
case 'h':
(void) printf("compute GC skew\n");
(void) printf("usage: util_skew [-w|W width] [-s step]\n");
exit(0);
break;
case 's':
if ( (sscanf(optarg, "%d", &step) != 1)
|| (step <= 0) ) {
(void) printf("bad step value\n");
exit(5);
}
break;
case 'w':
if ( (sscanf(optarg, "%d", &window) != 1)
|| (window <= 0) ) {
(void) printf("bad window value\n");
exit(5);
}
break;
case 'W':
if ( (sscanf(optarg, "%d", &window) != 1)
|| (window <= 0)
|| (window > 100)) {
(void) printf("bad window percent value\n");
exit(5);
}
window = -window;
break;
case '?':
(void) printf("usage: util_skew [-w|W width] [-s step]\n");
exit(6);
break;
}
}
nbins = strlen(alpha);
if (nbins < NB_NUC)
nbins++;
seq = NewFastaSequence();
nbseq = 0;
while (ReadFastaSequence(stdin, seq)) {
nbseq++;
if (! seq->ok) {
(void) printf("error at seq # %d\n", nbseq);
continue;
}
len = seq->length;
if (window > 0)
width = window;
else
width = len * (-window) / 100;
/* index sequence */
(void) sIndexSeq(seq->seq, alpha);
/* rotate origin */
for (pos = 0 ; pos < len; pos += step) {
if ((pos == 0) || (width < step)) {
sCount(seq->seq, pos, pos + width, len, countRight, 0);
sCount(seq->seq, pos + len - width, pos + len, len, countLeft, 0);
}
else {
sCount(seq->seq, pos - step, pos, len, countRight, -1);
sCount(seq->seq, pos - step + width, pos + width, len, countRight, 1);
sCount(seq->seq, pos - step + len - width, pos + len - width, len, countLeft, -1);
sCount(seq->seq, pos - step, pos, len, countLeft, 1);
}
chstwo(countRight, countLeft, nbins, &df, &chi, &proba);
/* add a sign */
sign = ((nbins == 2) ? ((countRight[0] >= countLeft[0]) ? 1 : -1)
: 1);
printf("%d %f %g\n", pos, chi * sign, proba);
}
}
FreeFastaSequence(seq);
exit(0);
/*NOTREACHED*/
}

View File

@ -0,0 +1,179 @@
/* ---------------------------------------------------------------- */
/* Copyright (c) Atelier de BioInformatique */
/* @file: util_skew.c */
/* @desc: compute GC skew */
/* */
/* @history: */
/* @+ <Gloup> : Jan 96 : PWG version */
/* ---------------------------------------------------------------- */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#ifdef SGI
#include <getopt.h>
#endif
#include "libbio.h"
#include "libfasta.h"
#define WINDOW_DFT -25 /* aka 25% */
#define STEP_DFT 10000
#define NB_NUC 4
static char sDna[] = "ACGT";
/* ----------------------------------------------- */
static char *sIndexSeq(char *seq, char *alpha) {
int len;
char *c, *pos;
len = strlen(alpha);
for (c = seq; *c ; c++) {
if (pos = strchr(alpha, *c))
*c = (int) (pos - alpha);
else
*c = len;
}
return seq;
}
/* ----------------------------------------------- */
static void sCount(char *seq, int from, int to, int len, int *count, int factor) {
int i, j;
if (factor == 0) {
for (i = 0 ; i < NB_NUC ; i++)
count[i] = 0;
factor = 1;
}
for (i = from ; i < to ; i++) {
j = seq[i%len];
if (j >= 0)
count[j] += factor;
}
}
/* ----------------------------------------------- */
/*ARGSUSED*/
main(argn, argv)
int argn;
char *argv[];
{
int opt, len, pos, nuc;
int countLeft[NB_NUC + 1], countRight[NB_NUC + 1];
int window, width, step;
float skew;
FastaSequence *seq;
extern char *optarg; /* externs for getopts (3C) */
step = STEP_DFT; /* default step size */
window = WINDOW_DFT; /* defaut window = 50% */
while ((opt = getopt(argn, argv, "hs:w:W:")) != -1) {
switch (opt) {
case 'h':
(void) printf("compute GC skew\n");
(void) printf("usage: util_skew [-w|W width] [-s step]\n");
exit(0);
break;
case 's':
if ( (sscanf(optarg, "%d", &step) != 1)
|| (step <= 0) ) {
(void) printf("bad step value\n");
exit(5);
}
break;
case 'w':
if ( (sscanf(optarg, "%d", &window) != 1)
|| (window <= 0) ) {
(void) printf("bad window value\n");
exit(5);
}
break;
case 'W':
if ( (sscanf(optarg, "%d", &window) != 1)
|| (window <= 0)
|| (window > 100)) {
(void) printf("bad window percent value\n");
exit(5);
}
window = -window;
break;
case '?':
(void) printf("usage: util_skew [-w|W width] [-s step]\n");
exit(6);
break;
}
}
seq = NewFastaSequence();
ReadFastaSequence(stdin, seq);
if (! seq->ok) {
(void) printf("error while reading sequence\n");
exit(3);
}
len = seq->length;
if (window > 0)
width = window;
else
width = len * (-window) / 100;
/* index sequence */
(void) sIndexSeq(seq->seq, sDna);
/* rotate origin */
for (pos = 0 ; pos < len; pos += step) {
if ((pos == 0) || (width < step)) {
sCount(seq->seq, pos, pos + width, len, countRight, 0);
sCount(seq->seq, pos + len - width, pos + len, len, countLeft, 0);
}
else {
sCount(seq->seq, pos - step, pos, len, countRight, -1);
sCount(seq->seq, pos - step + width, pos + width, len, countRight, 1);
sCount(seq->seq, pos - step + len - width, pos + len - width, len, countLeft, -1);
sCount(seq->seq, pos - step, pos, len, countLeft, 1);
}
printf("%d", pos);
for (nuc = 0 ; nuc < NB_NUC ; nuc++) {
skew = (float) (countRight[nuc] + countLeft[nuc]);
if (skew == 0.)
skew = 1.;
else
skew = (float) (countRight[nuc] - countLeft[nuc]) / skew;
printf(" %f6.2", skew * 100.);
}
printf("\n");
}
FreeFastaSequence(seq);
exit(0);
}

View File

@ -0,0 +1,160 @@
/* ---------------------------------------------------------------- */
/* Copyright (c) Atelier de BioInformatique */
/* @file: util_skew.c */
/* @desc: compute GC skew */
/* */
/* @history: */
/* @+ <Gloup> : Jan 96 : PWG version */
/* ---------------------------------------------------------------- */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef SGI
#include <getopt.h>
#endif
#include "libbio.h"
#include "libfasta.h"
#define WINDOW_DFT 1000
#define STEP_DFT 300
/* ----------------------------------------------- */
static int sCount(char *seq, int from, int to, int len, int c) {
int i, count = 0;
for (i = from ; i < to ; i++) {
if (seq[i%len] == c)
count++;
}
return count;
}
/* ----------------------------------------------- */
/*ARGSUSED*/
main(argn, argv)
int argn;
char *argv[];
{
int nbseq, opt, len, pos;
int ng[2], nc[2], dg[2], dc[2];
int window, width, step;
float skew;
FastaSequence *seq;
extern char *optarg; /* externs for getopts (3C) */
step = STEP_DFT; /* default step size */
window = WINDOW_DFT; /* defaut window = 50% */
while ((opt = getopt(argn, argv, "hs:w:W:")) != -1) {
switch (opt) {
case 'h':
(void) printf("compute GC skew\n");
(void) printf("usage: util_skew [-w|W width] [-s step]\n");
exit(0);
break;
case 's':
if ( (sscanf(optarg, "%d", &step) != 1)
|| (step <= 0) ) {
(void) printf("bad step value\n");
exit(5);
}
break;
case 'w':
if ( (sscanf(optarg, "%d", &window) != 1)
|| (window <= 0) ) {
(void) printf("bad window value\n");
exit(5);
}
break;
case 'W':
if ( (sscanf(optarg, "%d", &window) != 1)
|| (window <= 0)
|| (window > 100)) {
(void) printf("bad window percent value\n");
exit(5);
}
window = -window;
break;
case '?':
(void) printf("usage: util_skew [-w|W width] [-s step]\n");
exit(6);
break;
}
}
seq = NewFastaSequence();
nbseq = 0;
while (ReadFastaSequence(stdin, seq)) {
nbseq++;
if (! seq->ok) {
(void) printf("error at seq # %d\n", nbseq);
continue;
}
len = seq->length;
if (window > 0)
width = window;
else
width = len * (-window) / 100;
/* rotate origin */
for (pos = 0 ; pos < len; pos += step) {
if ((pos == 0) || (width < step)) {
ng[0] = sCount(seq->seq, pos, pos + width, len, 'G');
nc[0] = sCount(seq->seq, pos, pos + width, len, 'C');
ng[1] = sCount(seq->seq, pos + len - width, pos + len, len, 'G');
nc[1] = sCount(seq->seq, pos + len - width, pos + len, len, 'C');
}
else {
dg[0] = sCount(seq->seq, pos - step, pos, len, 'G');
dc[0] = sCount(seq->seq, pos - step, pos, len, 'C');
dg[1] = sCount(seq->seq, pos - step + width, pos + width, len, 'G');
dc[1] = sCount(seq->seq, pos - step + width, pos + width, len, 'C');
ng[0] += dg[1] - dg[0];
nc[0] += dc[1] - dc[0];
dg[1] = sCount(seq->seq, pos - step + len - width, pos + len - width, len, 'G');
dc[1] = sCount(seq->seq, pos - step + len - width, pos + len - width, len, 'C');
ng[1] += dg[0] - dg[1];
nc[1] += dc[0] - dc[1];
}
skew = ((float) (ng[0] - nc[0]) / (float) (ng[0] + nc[0]))
- ((float) (ng[1] - nc[1]) / (float) (ng[1] + nc[1]));
printf("%d %f\n", pos, skew);
}
}
FreeFastaSequence(seq);
exit(0);
/*NOTREACHED*/
}

View File

@ -0,0 +1,196 @@
/* ---------------------------------------------------------------- */
/* Copyright (c) Atelier de BioInformatique */
/* @file: util_tab_aa.c */
/* @desc: tabulate aminoacid usage */
/* */
/* @history: */
/* @+ <Gloup> : Jan 96 : PWG version */
/* ---------------------------------------------------------------- */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef SGI
#include <getopt.h>
#endif
#include "Genetic.h"
#include "libbio.h"
#include "libfasta.h"
#ifndef Max
#define Max(i, j) ((i) > (j) ? (i) : (j))
#endif
#define NB_AA 21
#define X_AA (NB_AA-1)
static char sAA[] = "ACDEFGHIKLMNPQRSTVWY";
/* ----------------------------------------------- */
static int sCharIndex(char *s, int c)
{
char *ss = strchr(s, c);
return (ss ? (int) (ss - s) : -1);
}
/* ----------------------------------------------- */
static int sSum(int *t, char *s, int imax, char *ignore)
{
int i, sum;
for (i = sum = 0 ; i < imax ; i++)
if ((ignore == 0) || (sCharIndex(ignore, s[i]) == -1))
sum += t[i];
return Max(sum, 1);
}
/* ----------------------------------------------- */
main(argn, argv)
int argn;
char *argv[];
{
int i, k, imin, imax, nbseq, nbaa, opt;
int f_flag, t_flag, r_flag, x_flag;
int count[NB_AA], tot[NB_AA];
FastaSequence *seq;
char ignore[256];
extern char *optarg; /* externs for getopts (3C) */
f_flag = 0; /* consider first residue */
t_flag = 0; /* no total */
r_flag = 0; /* use counts not rel. freq. */
x_flag = 0; /* no unknown symbols */
*ignore = '\000';
while ((opt = getopt(argn, argv, "1hi:rtx")) != -1) {
switch (opt) {
case '1':
f_flag = 1;
break;
case 'h':
(void) printf("tabulate amino-acid usage\n");
(void) printf("usage: translate [-1] [-i alpha] [-x]\n");
(void) printf(" -1 : ignore first residue\n");
(void) printf(" -i alpha : ignore symbols in alpha\n");
(void) printf(" -r : compute relative frequencies\n");
(void) printf(" -t : print last total line\n");
(void) printf(" -x : ignore unknown symbol\n");
exit(0);
break;
case 'i':
(void) strcpy(ignore, optarg);
break;
case 'r':
r_flag = 1;
break;
case 't':
t_flag = 1;
break;
case 'x':
x_flag = 1;
break;
case '?':
(void) printf("usage: tab_aa [-h] [-1] [-i alpha] [-r] [-t] [-x]\n");
exit(6);
break;
}
}
seq = NewFastaSequence();
nbseq = 0;
imin = (f_flag ? 1 : 0);
imax = (x_flag ? NB_AA : NB_AA - 1);
for (i = 0 ; i < NB_AA ; i++)
tot[i] = 0;
printf("name");
for (i = 0 ; i < imax ; i++) {
if (sCharIndex(ignore, sAA [i]) == -1)
printf("\t%c", sAA[i]);
}
printf("\n");
while (ReadFastaSequence(stdin, seq)) {
nbseq++;
if (! seq->ok)
(void) printf("error at seq # %d\n", nbseq);
for (i = 0 ; i < NB_AA ; i++)
count[i] = 0;
for (i = imin ; i < seq->length ; i++) {
k = sCharIndex(sAA, seq->seq[i]);
if (k >= 0)
count[k]++;
else if (x_flag)
count[X_AA]++;
else
fprintf(stderr, "Invalid symbol %c in sequence %s\n",
seq->seq[i], seq->name);
}
nbaa = (r_flag ? sSum(count, sAA, imax, ignore) : 0);
printf("%s", seq->name);
for (i = 0 ; i < imax ; i++) {
if (sCharIndex(ignore, sAA[i]) == -1) {
if (r_flag)
printf("\t%.1f", 100. * (float) count[i] / (float) nbaa);
else
printf("\t%d", count[i]);
}
}
printf("\n");
if (t_flag) {
for (i = 0 ; i < NB_AA ; i++)
tot[i] += count[i];
}
}
if (t_flag) {
printf("total:");
nbaa = (r_flag ? sSum(tot, sAA, imax, ignore) : 0);
for (i = 0 ; i < imax ; i++) {
if (sCharIndex(ignore, sAA [i]) == -1) {
if (r_flag)
printf("\t%.1f", 100. * (float) tot[i] / (float) nbaa);
else
printf("\t%d", tot[i]);
}
}
printf("\n");
}
FreeFastaSequence(seq);
exit(0);
/*NOTREACHED*/
}

View File

@ -0,0 +1,685 @@
/* ---------------------------------------------------------------- */
/* Copyright (c) Atelier de BioInformatique */
/* @file: util_tab_codon.c */
/* @desc: tabulate codon usage */
/* */
/* @history: */
/* @+ <Gloup> : Jan 96 : PWG version */
/* ---------------------------------------------------------------- */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#ifdef SGI
#include <getopt.h>
#endif
#include "Genetic.h"
#include "libbio.h"
#include "libfasta.h"
#ifndef Max
#define Max(i, j) ((i) > (j) ? (i) : (j))
#endif
#define NB_CODONS 65
#define X_CODON (NB_CODONS-1)
#define NB_AA 21
#define X_AA (NB_AA-1)
#define COUNT_MODE 0 /* comptage absolu */
#define FREQU_MODE 1 /* freq. relative */
#define SYNON_MODE 2 /* freq. rel. sur synonymes */
#define MIN_STORAGE 100
#define USE_HASH 1 /* quicker version */
/* ! see listing ! */
typedef char NameString[16];
typedef struct s_Storage {
NameString name;
int counts[NB_CODONS];
} Storage;
static char sDna[] = "ACTG";
static char sAA[] = "ACDEFGHIKLMNPQRSTVWY*";
/* ----------------------------------------------- */
static char *sUpper(char *s)
{
char *c;
for (c = s ; *c ; c++) {
if (islower(*c))
*c = toupper(*c);
}
return s;
}
/* ----------------------------------------------- */
static char *sUpperDna(char *s)
{
char *c;
for (c = sUpper(s) ; *c ; c++) {
if (*c == 'U')
*c = 'T';
}
return s;
}
/* ----------------------------------------------- */
static char *sUpperProt(char *s)
{
char *c;
for (c = sUpper(s) ; *c ; c++) {
if (*c == '#')
*c = '*';
}
return s;
}
/* ----------------------------------------------- */
static void sMakeStdCodons(char *buffer)
{
int i, j, k;
for (i = 0 ; i < 4 ; i++)
for (j = 0 ; j < 4 ; j++)
for (k = 0 ; k < 4 ; k++) {
*buffer++ = sDna[i];
*buffer++ = sDna[j];
*buffer++ = sDna[k];
*buffer++ = '/';
}
}
/* ----------------------------------------------- */
static void sMakeAaCodons(char *buffer, char *aa, char *codons, int code)
{
int i, k;
char *c;
while (*aa) {
for (i = 0, c = codons ; i < X_CODON ; i++, c += 4) {
k = bio_codon_translate(c, code);
if (strchr(aa, k)) {
strncpy(buffer, c, 3);
buffer[3] = '/';
buffer += 4;
}
}
aa++;
}
*buffer = '\000';
}
/* ----------------------------------------------- */
static int sCodonIndex(char *s, char *c)
{
int i, ssa;
char *sa, *sb;
for (i = 0 ; i < 3 ; i++)
if (! c[i]) return -1;
sa = c + 3;
ssa = *sa;
*sa = '\000';
sb = strstr(s, c);
*sa = ssa;
return (sb ? (int) (sb - s) / 4 : -1);
}
#if USE_HASH
/* ----------------------------------------------- */
/* this is a quicker version of sCodonIndex */
/* ! only works if alpha == sDna */
static int sCodonHash(char *alpha, char *c)
{
int i, h;
char *p;
for (i = h = 0 ; i < 3 ; i++) {
if (! (p = strchr(alpha, c[i])))
return -1;
h = (h << 2) | (int) (p - alpha);
}
return h;
}
#endif
/* ----------------------------------------------- */
static int sAaIndex(char *saa, char *c, int code)
{
int aa;
char *paa;
aa = bio_codon_translate(c, code);
if ((paa = strchr(saa, aa)) != 0)
return (int) (paa - saa);
return X_AA;
}
/* ----------------------------------------------- */
static int sSumCounts(char *codons, int *counts, int zero)
{
int i, sum;
char *trip;
for (i = sum = 0, trip = codons ; i < X_CODON ; i++, trip += 4)
sum += counts[i];
return (sum ? sum : zero);
}
/* ----------------------------------------------- */
static void sSumAA(char *codons, char *aa, int code, int *counts, int *naa, int zero)
{
int i, k;
char *trip;
for (i = 0 ; i < NB_AA ; i++)
naa[i] = 0;
for (i = 0, trip = codons ; i < X_CODON ; i++, trip += 4) {
k = sAaIndex(aa, trip, code);
naa[k] += counts[i];
}
for (i = 0 ; i < NB_AA ; i++)
if (naa[i] == 0)
naa[i] = zero;
}
/* ----------------------------------------------- */
static void sPrintCodonUsage(int *tot, char *codons, int code)
{
int k, n;
char *aa, *trip;
char bufaa[2],
bufco[4*NB_CODONS+1];
bufaa[1] = '\000';
for (aa = sAA ; *aa ; aa++) {
*bufaa = *aa;
sMakeAaCodons(bufco, bufaa, codons, code);
printf ("%1s\n", bufaa);
for (trip = bufco, n = 0 ; *trip ; trip += 4) {
k = sCodonIndex(codons, trip);
n += tot[k];
}
if (n == 0) n = 1;
for (trip = bufco ; *trip ; trip += 4) {
k = sCodonIndex(codons, trip);
printf(" %3.3s\t%.2f %d\n", trip, (float) tot[k]/ (float) n,
tot[k]);
}
}
}
/* ----------------------------------------------- */
static Storage *sIncreaseStorage(Storage *store, int *size)
{
int nsiz;
Storage *new;
nsiz = Max(*size * 2, MIN_STORAGE);
if (store)
new = (Storage *) realloc(store, nsiz * sizeof(Storage));
else
new = (Storage *) malloc(nsiz * sizeof(Storage));
if (new)
*size = nsiz;
return new;
}
/* ----------------------------------------------- */
static void sCopyStorage(Storage *store, int *counts, char *name)
{
int i;
for (i = 0 ; i < NB_CODONS ; i++)
store->counts[i] = counts[i];
(void) strncpy(store->name, name, sizeof(NameString));
store->name[sizeof(NameString)-1] = '\000';
}
/* ----------------------------------------------- */
main(argn, argv)
int argn;
char *argv[];
{
int i, j, k, imin, imax, nbseq, opt, code, sum, npart, total;
int sa_flag, so_flag, r_flag, t_flag, p_flag, g_flag, a_flag;
int counts[NB_CODONS], totcd[NB_CODONS], corcd[NB_CODONS],
naa[NB_AA], totaa[NB_AA];
Storage *partial;
FastaSequence *seq;
char *trip;
char codons[4*NB_CODONS+1],
aas_ignore[256],
usr_ignore[4*NB_CODONS+1];
extern char *optarg; /* externs for getopts (3C) */
code = 0; /* universal genetic code */
sa_flag = 0; /* consider first codon */
so_flag = 0; /* consider last codon */
t_flag = 0; /* no total */
r_flag = COUNT_MODE; /* compute counts */
p_flag = 0; /* no pretty print */
g_flag = 0; /* no global correction */
a_flag = 0; /* no aa names */
*aas_ignore = '\000';
*usr_ignore = '\000';
npart = 0;
partial = NULL;
sMakeStdCodons(codons);
/* ---------------------------- */
/* parse arguments */
/* ---------------------------- */
while ((opt = getopt(argn, argv, "ac:hgi:I:rRsStp")) != -1) {
switch (opt) {
case 'a':
a_flag = 1;
break;
case 'c':
if ( (sscanf(optarg, "%d", &code) != 1)
|| (code < 0) || (code > 8)) {
(void) printf("bad code value: -c (0-8)\n");
exit(5);
}
break;
case 'h':
(void) printf("tabulate codon usage (CU)\n");
(void) printf("usage: tab_codon [-a] [-c code] [-i|I alpha]\n");
(void) printf(" [-r|R] [-s] [-S] [-t] [-p]\n");
(void) printf(" -a : add the amino-acid symbol in axis name\n");
(void) printf(" -c code\n");
(void) printf(" 0 : universal\n");
(void) printf(" 1 : mito yeast\n");
(void) printf(" 2 : mito vertebrate\n");
(void) printf(" 3 : filamentous fungi\n");
(void) printf(" 4 : mito insects & platyhelminthes\n");
(void) printf(" 5 : Candida cylindracea\n");
(void) printf(" 6 : Ciliata\n");
(void) printf(" 7 : Euplotes\n");
(void) printf(" 8 : mito echinoderms\n");
(void) printf(" -i alpha : ignore codons in alpha\n");
(void) printf(" alpha has form: 'ATG/GTG/CTG'\n");
(void) printf(" -I alpha : ignore codons whose aa are in alpha\n");
(void) printf(" alpha has form: 'FYW'\n");
(void) printf(" '#' means stop\n");
(void) printf(" -r : compute relative frequencies (rCU)\n");
(void) printf(" -R : compute synonymous relative frequencies (rSCU)\n");
(void) printf(" note: by default (no -r nor -R) counts (CU) are printed\n");
(void) printf(" -g : apply various global count corrections :\n");
(void) printf(" for CU : global aa usage correction -> SCU\n");
(void) printf(" for rCU : global aa usage correction -> SrCU\n");
(void) printf(" for rSCU : global codon usage correction ->CrSCU\n");
(void) printf(" -s : ignore first (start) codon\n");
(void) printf(" -S : ignore last (stop) codon\n");
(void) printf(" -p : pretty print codon usage\n");
(void) printf(" -r -R -g options are then ignored\n");
(void) printf(" -t : print last total line\n");
exit(0);
break;
case 'g':
g_flag = 1;
break;
case 'i':
(void) strcpy(usr_ignore, optarg);
(void) sUpperDna(usr_ignore);
break;
case 'I':
(void) strcpy(aas_ignore, optarg);
(void) sUpperProt(aas_ignore);
break;
case 't':
t_flag = 1;
break;
case 'r':
r_flag = FREQU_MODE;
break;
case 'R':
r_flag = SYNON_MODE;
break;
case 's':
sa_flag = 1;
break;
case 'S':
so_flag = 1;
break;
case 'p':
p_flag = 1;
break;
case '?':
(void) printf("usage: tab_codon [-h] [-c code] [-i|I alpha]\n");
(void) printf(" [-r|R] [-s] [-S] [-t] [-p] [-a] [-b]\n");
exit(6);
break;
}
}
/* ---------------------------- */
/* check usage */
/* ---------------------------- */
if (*aas_ignore && *usr_ignore) {
fprintf(stderr,"tab_codon: -i and -I incompatible options\n");
exit(5);
}
if (*aas_ignore)
sMakeAaCodons(usr_ignore, aas_ignore, codons, code);
seq = NewFastaSequence();
nbseq = 0;
for (i = 0 ; i < NB_CODONS ; i++)
totcd[i] = 0;
if (! p_flag) {
printf("name");
for (i = 0, trip = codons ; i < X_CODON ; i++, trip += 4) {
if (sCodonIndex(usr_ignore, trip) == -1) {
printf("\t%3.3s", trip);
if (a_flag) {
k = sAaIndex(sAA, trip, code);
printf("/%1c", sAA[k]);
}
}
}
printf("\n");
}
/* ---------------------------- */
/* loop on sequences */
/* ---------------------------- */
while (ReadFastaSequence(stdin, seq)) {
nbseq++;
if (! seq->ok)
(void) printf("error at seq # %d\n", nbseq);
/* -------------------------------- */
/* compute counts */
/* -------------------------------- */
for (i = 0 ; i < NB_CODONS ; i++)
counts[i] = 0;
imin = (sa_flag ? 3 : 0);
imax = seq->length - (so_flag ? 3 : 0);
k = -1;
for (i = imin ; i < imax ; i += 3) {
#if USE_HASH
k = sCodonHash(sDna, seq->seq + i);
#else
k = sCodonIndex(codons, seq->seq + i);
#endif
if (k >= 0)
counts[k]++;
else
fprintf(stderr, "invalid codon %3.3s at position %d in sequence %s\n",
seq->seq + i, i+1, seq->name);
}
/* remove ignored codons */
for (i = 0, trip = codons ; i < X_CODON ; i++, trip += 4) {
if (sCodonIndex(usr_ignore, trip) >= 0)
counts[i] = 0;
}
/* -------------------------------- */
/* compute totals */
/* -------------------------------- */
for (i = 0 ; i < NB_CODONS ; i++)
totcd[i] += counts[i];
/* -------------------------------- */
/* store or print local values */
/* -------------------------------- */
if (! p_flag) {
/* ------------ */
if (g_flag) { /* store */
/* ------------ */
if (npart <= nbseq) {
partial = sIncreaseStorage(partial, &npart);
if (! partial) {
fprintf(stderr,"not enough memory for %d sequences\n", nbseq);
exit(10);
}
}
sCopyStorage(partial + nbseq - 1, counts, seq->name);
}
/* ------------ */
else { /* printout */
/* ------------ */
sum = sSumCounts(codons, counts, 1);
if (r_flag == SYNON_MODE)
sSumAA(codons, sAA, code, counts, naa, 1);
printf("%s", seq->name);
for (i = 0, trip = codons ; i < X_CODON ; i++, trip += 4) {
if (sCodonIndex(usr_ignore, trip) == -1) {
if (r_flag == COUNT_MODE)
printf("\t%d", counts[i]);
else if (r_flag == FREQU_MODE)
printf("\t%.2f", 100. * (float) counts[i] / (float) sum);
else if (r_flag == SYNON_MODE) {
k = sAaIndex(sAA, trip, code);
printf("\t%.1f", 100. * (float) counts[i] / (float) naa[k]);
}
}
}
printf("\n");
}
}
}
/* ---------------------------- */
/* end of read loop */
/* ---------------------------- */
/* ---------------------------- */
/* global correction */
/* ---------------------------- */
if (g_flag && (! p_flag)) {
/* ------------------------ */
/* compute corrected counts */
/* ------------------------ */
total = sSumCounts(codons, totcd, 1);
sSumAA(codons, sAA, code, totcd, totaa, 1);
if ((r_flag == COUNT_MODE) || (r_flag == FREQU_MODE)) {
sSumAA(codons, sAA, code, totcd, totaa, 0);
for (i = 0 ; i < X_CODON ; i++)
corcd[i] = 0;
for (j = 0 ; j < nbseq ; j++) {
sum = sSumCounts(codons, partial[j].counts, 0);
sSumAA(codons, sAA, code, partial[j].counts, naa, 1);
for (i = 0, trip = codons ; i < X_CODON ; i++, trip += 4) {
k = sAaIndex(sAA, trip, code);
partial[j].counts[i] = (int) floor(
(double)partial[j].counts[i] * (double) totaa[k]
* (double) sum / (double) total / (double) naa[k]
+ 0.5);
corcd[i] += partial[j].counts[i];
}
}
}
else {
for (i = 0 ; i < X_CODON ; i++)
corcd[i] = totcd[i];
}
/* ------------------------ */
/* printout sequences */
/* ------------------------ */
for (j = 0 ; j < nbseq ; j++) {
sum = sSumCounts(codons, partial[j].counts, 1);
if (r_flag == SYNON_MODE)
sSumAA(codons, sAA, code, partial[j].counts, naa, 0);
printf("%s", partial[j].name);
for (i = 0, trip = codons ; i < X_CODON ; i++, trip += 4) {
if (sCodonIndex(usr_ignore, trip) == -1) {
if (r_flag == COUNT_MODE)
printf("\t%d", partial[j].counts[i]);
else if (r_flag == FREQU_MODE)
printf("\t%.2f", 100. * (float) partial[j].counts[i] / (float) sum);
else if (r_flag == SYNON_MODE) {
k = sAaIndex(sAA, trip, code);
if (naa[k] != 0)
printf("\t%.1f", 100. * (float) partial[j].counts[i] / (float) naa[k]);
else
printf("\t%.1f", 100. * (float) totcd[i] / (float) totaa[k]);
}
}
}
printf("\n");
}
total = sSumCounts(codons, corcd, 1);
for (i = 0 ; i < X_CODON ; i++)
totcd[i] = corcd[i];
sSumAA(codons, sAA, code, totcd, totaa, 1);
}
/* ---------------------------- */
/* print grand total */
/* ---------------------------- */
if (t_flag && (! p_flag)) {
printf("total:");
sum = sSumCounts(codons, totcd, 1);
for (i = 0, trip = codons ; i < X_CODON ; i++, trip += 4) {
if (sCodonIndex(usr_ignore, trip) == -1) {
if (r_flag == COUNT_MODE)
printf("\t%d", totcd[i]);
else if (r_flag == FREQU_MODE)
printf("\t%.2f", 100. * (float) totcd[i] / (float) sum);
else if (r_flag == SYNON_MODE) {
k = sAaIndex(sAA, trip, code);
printf("\t%.1f", 100. * (float) totcd[i] / (float) totaa[k]);
}
}
}
printf("\n");
}
/* ---------------------------- */
/* pretty print */
/* ---------------------------- */
if (p_flag) {
sPrintCodonUsage(totcd, codons, code);
}
/* ---------------------------- */
/* free memory */
/* ---------------------------- */
#if 1
if (partial)
free(partial);
FreeFastaSequence(seq);
#endif
exit(0);
/*NOTREACHED*/
}

View File

@ -0,0 +1,289 @@
/* ---------------------------------------------------------------- */
/* Copyright (c) Atelier de BioInformatique */
/* @file: util_tab_nuc.c */
/* @desc: tabulate dinucleotide usage */
/* */
/* @history: */
/* @+ <Gloup> : Jan 96 : PWG version */
/* ---------------------------------------------------------------- */
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef SGI
#include <getopt.h>
#endif
#include "Genetic.h"
#include "libbio.h"
#include "libfasta.h"
#ifndef Max
#define Max(i, j) ((i) > (j) ? (i) : (j))
#endif
#define NB_DINUC 16
static char sDna[] = "ACTG";
/* ----------------------------------------------- */
static int sCharIndex(char *s, int c)
{
char *ss = strchr(s, c);
return (ss ? (int) (ss - s) : -1);
}
/* ----------------------------------------------- */
static char *sUpper(char *s)
{
char *c;
for (c = s ; *c ; c++) {
if (islower(*c))
*c = toupper(*c);
}
return s;
}
/* ----------------------------------------------- */
static char *sUpperDna(char *s)
{
char *c;
for (c = sUpper(s) ; *c ; c++) {
if (*c == 'U')
*c = 'T';
}
return s;
}
/* ----------------------------------------------- */
static int sDiHash(char *alpha, char *c)
{
int i, h;
char *p;
for (i = h = 0 ; i < 2 ; i++) {
if (! (p = strchr(alpha, c[i])))
return -1;
h = (h << 2) | (int) (p - alpha);
}
return h;
}
/* ----------------------------------------------- */
static int sUpdateDiHash(int oldh, char *alpha, char *c)
{
char *p;
if (oldh < 0)
return sDiHash(alpha, c);
if (! (p = strchr(alpha, c[1])))
return -1;
return (((oldh << 2) & 0xf) | (int) (p - alpha));
}
/* ----------------------------------------------- */
static void sMakeStdDinuc(char *buffer)
{
int i, j;
for (i = 0 ; i < 4 ; i++)
for (j = 0 ; j < 4 ; j++) {
*buffer++ = sDna[i];
*buffer++ = sDna[j];
*buffer++ = '/';
}
}
/* ----------------------------------------------- */
static int sInPhase(int phase, int flag)
{
switch (flag) {
case 1 : return (phase == 0);
case 2 : return (phase == 1);
case 3 : return (phase == 2);
case 12 : return ((phase == 0) || (phase == 1));
case 13 : return ((phase == 0) || (phase == 2));
case 23 : return ((phase == 1) || (phase == 2));
case 123 : return 1;
}
return 0;
}
/* ----------------------------------------------- */
main(argn, argv)
int argn;
char *argv[];
{
int i, j, k, imin, imax, sum, nbseq, opt;
int sa_flag, so_flag, r_flag, t_flag, p_flag;
int count[NB_DINUC], tot[NB_DINUC];
FastaSequence *seq;
char *din, dinuc[3*NB_DINUC+1];
extern char *optarg; /* externs for getopts (3C) */
sa_flag = 0; /* consider first codon */
so_flag = 0; /* consider last codon */
t_flag = 0; /* no total */
r_flag = 0; /* compute counts */
p_flag = 123; /* 3 phases */
sMakeStdDinuc(dinuc);
while ((opt = getopt(argn, argv, "hrsStp:")) != -1) {
switch (opt) {
case 'h':
(void) printf("tabulate nucleotide usage\n");
(void) printf("usage: tab_nuc [-h] [-r] [-s] [-S] [-t] [-p]\n");
(void) printf(" -p : counts by positions\n");
(void) printf(" -r : compute relative frequencies\n");
(void) printf(" -s : ignore first (start) codon\n");
(void) printf(" -S : ignore last (stop) codon\n");
(void) printf(" -t : print last total line\n");
exit(0);
break;
case 't':
t_flag = 1;
break;
case 'r':
r_flag = 1;
break;
case 's':
sa_flag = 1;
break;
case 'S':
so_flag = 1;
break;
case 'p':
if ( (sscanf(optarg, "%d", &p_flag) != 1)
|| ( (p_flag != 1) && (p_flag != 2) && (p_flag != 3)
&& (p_flag != 12) && (p_flag != 13) && (p_flag != 23)
&& (p_flag != 123))) {
(void) printf("bad phase value: -p [1][2][3]\n");
exit(5);
}
break;
case '?':
(void) printf("usage: tab_codon tab_nuc [-h] [-r] [-s] [-S] [-t] [-p [123]]\n");
exit(6);
break;
}
}
seq = NewFastaSequence();
nbseq = 0;
for (i = 0 ; i < NB_DINUC ; i++)
tot[i] = 0;
printf("name");
for (i = 0, din = dinuc ; i < NB_DINUC ; i++, din += 3) {
printf("\t%2.2s", din);
}
printf("\n");
while (ReadFastaSequence(stdin, seq)) {
nbseq++;
if (! seq->ok)
(void) printf("error at seq # %d\n", nbseq);
for (i = 0 ; i < NB_DINUC ; i++)
count[i] = 0;
imin = (sa_flag ? 3 : 0);
imax = seq->length - (so_flag ? 3 : 0) - 1;
for (i = imin, j = -1, k = 0 ; i < imax ; i++) {
j = sUpdateDiHash(j, sDna, seq->seq + i);
if (sInPhase(k, p_flag)) {
if (j >= 0)
count[j]++;
else
fprintf(stderr, "invalid dinucleotide %2.2s at position %d in sequence %s\n",
seq->seq + i, i+1, seq->name);
}
k = (k+1) % 3;
}
sum = 0;
if (r_flag) {
for (i = 0 ; i < NB_DINUC ; i++)
sum += count[i];
sum = Max(sum, 1);
}
printf("%s", seq->name);
for (i = 0 ; i < NB_DINUC ; i++) {
if (r_flag)
printf("\t%.2f", (float) count[i] / (float) sum);
else
printf("\t%d", count[i]);
}
printf("\n");
if (t_flag) {
for (i = 0 ; i < NB_DINUC ; i++)
tot[i] += count[i];
}
}
if (t_flag) {
printf("total:");
sum = 0;
if (r_flag) {
for (i = 0 ; i < NB_DINUC ; i++)
sum += tot[i];
sum = Max(sum, 1);
}
for (i = 0 ; i < NB_DINUC ; i++) {
if (r_flag)
printf("\t%.2f", (float) tot[i] / (float) sum);
else
printf("\t%d", tot[i]);
}
printf("\n");
}
FreeFastaSequence(seq);
exit(0);
/*NOTREACHED*/
}

View File

@ -0,0 +1,234 @@
/* ---------------------------------------------------------------- */
/* Copyright (c) Atelier de BioInformatique */
/* @file: util_tab_nuc.c */
/* @desc: tabulate nucleotide usage */
/* */
/* @history: */
/* @+ <Gloup> : Jan 96 : PWG version */
/* ---------------------------------------------------------------- */
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef SGI
#include <getopt.h>
#endif
#include "Genetic.h"
#include "libbio.h"
#include "libfasta.h"
#ifndef Max
#define Max(i, j) ((i) > (j) ? (i) : (j))
#endif
#define NB_NUC 4
static char sDna[] = "ACTG";
/* ----------------------------------------------- */
static int sCharIndex(char *s, int c)
{
char *ss = strchr(s, c);
return (ss ? (int) (ss - s) : -1);
}
/* ----------------------------------------------- */
static char *sUpper(char *s)
{
char *c;
for (c = s ; *c ; c++) {
if (islower(*c))
*c = toupper(*c);
}
return s;
}
/* ----------------------------------------------- */
static char *sUpperDna(char *s)
{
char *c;
for (c = sUpper(s) ; *c ; c++) {
if (*c == 'U')
*c = 'T';
}
return s;
}
/* ----------------------------------------------- */
main(argn, argv)
int argn;
char *argv[];
{
int i, j, k, imin, imax, kmax, nbseq, opt;
int sa_flag, so_flag, r_flag, t_flag, p_flag;
int count[3][NB_NUC], tot[3][NB_NUC], sum[3];
FastaSequence *seq;
sa_flag = 0; /* consider first codon */
so_flag = 0; /* consider last codon */
t_flag = 0; /* no total */
r_flag = 0; /* compute counts */
p_flag = 0; /* no position specific */
while ((opt = getopt(argn, argv, "hrsStp")) != -1) {
switch (opt) {
case 'h':
(void) printf("tabulate nucleotide usage\n");
(void) printf("usage: tab_nuc [-h] [-r] [-s] [-S] [-t] [-p]\n");
(void) printf(" -p : counts by positions\n");
(void) printf(" -r : compute relative frequencies\n");
(void) printf(" -s : ignore first (start) codon\n");
(void) printf(" -S : ignore last (stop) codon\n");
(void) printf(" -t : print last total line\n");
exit(0);
break;
case 't':
t_flag = 1;
break;
case 'r':
r_flag = 1;
break;
case 's':
sa_flag = 1;
break;
case 'S':
so_flag = 1;
break;
case 'p':
p_flag = 1;
break;
case '?':
(void) printf("usage: tab_codon tab_nuc [-h] [-r] [-s] [-S] [-t] [-p]\n");
exit(6);
break;
}
}
seq = NewFastaSequence();
nbseq = 0;
kmax = (p_flag ? 3 : 1);
for (k = 0 ; k < kmax ; k++)
for (i = 0 ; i < NB_NUC ; i++)
tot[k][i] = 0;
printf("name");
for (k = 0 ; k < kmax ; k++)
for (i = 0 ; i < NB_NUC ; i++) {
if (p_flag)
printf("\t%c%1d", sDna[i], (k+1));
else
printf("\t%c", sDna[i]);
}
printf("\n");
while (ReadFastaSequence(stdin, seq)) {
nbseq++;
if (! seq->ok)
(void) printf("error at seq # %d\n", nbseq);
for (k = 0 ; k < kmax ; k++)
for (i = 0 ; i < NB_NUC ; i++)
count[k][i] = 0;
imin = (sa_flag ? 3 : 0);
imax = seq->length - (so_flag ? 3 : 0);
for (i = imin, k = 0 ; i < imax ; i++) {
j = sCharIndex(sDna, seq->seq[i]);
if (j >= 0)
count[k][j]++;
else
fprintf(stderr, "invalid nucleotide %1.1s at position %d in sequence %s\n",
seq->seq + i, i+1, seq->name);
k = (p_flag ? (k+1)%3 : 0);
}
for (k = 0 ; k < kmax ; k++)
sum[k] = 0;
if (r_flag) {
for (k = 0 ; k < kmax ; k++)
for (i = 0 ; i < NB_NUC ; i++)
sum[k] += count[k][i];
for (k = 0 ; k < kmax ; k++)
sum[k] = Max(sum[k], 1);
}
printf("%s", seq->name);
for (k = 0 ; k < kmax ; k++)
for (i = 0 ; i < NB_NUC ; i++) {
if (r_flag)
printf("\t%.2f", (float) count[k][i] / (float) sum[k]);
else
printf("\t%d", count[k][i]);
}
printf("\n");
if (t_flag) {
for (k = 0 ; k < kmax ; k++)
for (i = 0 ; i < NB_NUC ; i++)
tot[k][i] += count[k][i];
}
}
if (t_flag) {
printf("total:");
for (k = 0 ; k < kmax ; k++)
sum[k] = 0;
if (r_flag) {
for (k = 0 ; k < kmax ; k++)
for (i = 0 ; i < NB_NUC ; i++)
sum[k] += tot[k][i];
for (k = 0 ; k < kmax ; k++)
sum[k] = Max(sum[k], 1);
}
for (k = 0 ; k < kmax ; k++)
for (i = 0 ; i < NB_NUC ; i++) {
if (r_flag)
printf("\t%.2f", (float) tot[k][i] / (float) sum[k]);
else
printf("\t%d", tot[k][i]);
}
printf("\n");
}
FreeFastaSequence(seq);
exit(0);
/*NOTREACHED*/
}

View File

@ -0,0 +1,144 @@
/* ---------------------------------------------------------------- */
/* Copyright (c) Atelier de BioInformatique */
/* @file: util_translate.c */
/* @desc: util_translate a fasta nucleic sequence */
/* */
/* @history: */
/* @+ <Gloup> : Jan 96 : PWG version */
/* ---------------------------------------------------------------- */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef SGI
#include <getopt.h>
#endif
#include "libbio.h"
#include "libfasta.h"
/* ----------------------------------------------- */
/* safely copy overlapping strings */
static void sStrCpy(char *s1, char *s2)
{
while (*s2)
*s1++ = *s2++;
*s1 = '\000';
}
/* ----------------------------------------------- */
main(argn, argv)
int argn;
char *argv[];
{
int nbseq, minlen, opt, code;
int sa_flag, so_flag;
FastaSequence *seq;
extern char *optarg; /* externs for getopts (3C) */
code = 0;
sa_flag = so_flag = 0;
while ((opt = getopt(argn, argv, "c:hsS")) != -1) {
switch (opt) {
case 'c':
if ( (sscanf(optarg, "%d", &code) != 1)
|| (code < 0) || (code > 8)) {
(void) printf("bad code value: -c (0-8)\n");
exit(5);
}
break;
case 'h':
(void) printf("translate dna sequence[s] to protein[s]\n");
(void) printf("usage: translate [-c (0-8)] [-s] [-S]\n");
(void) printf(" -c code\n");
(void) printf(" 0 : universal\n");
(void) printf(" 1 : mito yeast\n");
(void) printf(" 2 : mito vertebrate\n");
(void) printf(" 3 : filamentous fungi\n");
(void) printf(" 4 : mito insects & platyhelminthes\n");
(void) printf(" 5 : Candida cylindracea\n");
(void) printf(" 6 : Ciliata\n");
(void) printf(" 7 : Euplotes\n");
(void) printf(" 8 : mito echinoderms\n");
(void) printf(" -s : ignore first (start) codon\n");
(void) printf(" -S : ignore last (stop) codon\n");
exit(0);
break;
case 's':
sa_flag = 1;
break;
case 'S':
so_flag = 1;
break;
case '?':
(void) printf("usage: translate [-c (0-8)] [-s] [-S]\n");
exit(6);
break;
}
}
seq = NewFastaSequence();
nbseq = 0;
while (ReadFastaSequence(stdin, seq)) {
nbseq++;
if (! seq->ok) {
(void) printf("error at seq # %d\n", nbseq);
(void) printf("bad length at seq # %d\n", nbseq);
continue;
}
minlen = 0;
if (sa_flag) minlen += 3;
if (so_flag) minlen += 3;
if (seq->length <= minlen) {
(void) printf("bad length at seq # %d\n", nbseq);
continue;
}
(void) bio_seq_translate(seq->seq, code);
(void) strcat(seq->comment, " (translation)");
seq->length /= 3;
if (sa_flag) {
sStrCpy(seq->seq, seq->seq + 1);
seq->length--;
}
if (so_flag)
seq->length--;
WriteFastaSequence(stdout, seq, FASTA_CHAR_PER_LINE);
}
FreeFastaSequence(seq);
exit(0);
/*NOTREACHED*/
}