1776 lines
33 KiB
HTML
1776 lines
33 KiB
HTML
<!DOCTYPE html>
|
||
<html lang="en-us" dir="ltr">
|
||
<head>
|
||
<meta charset="UTF-8">
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||
<meta name="description" content="
|
||
The FASTA sequence file format
|
||
#
|
||
|
||
The FASTA sequence file format is the most widely used sequence file format. This is probably due to its simplicity. It was originally created for the Lipman and Pearson
|
||
FASTA program
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
(
|
||
|
||
Citation: Pearson & Lipman, 1988
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
Pearson, 
|
||
|
||
W. & Lipman, 
|
||
|
||
D.
|
||
 
|
||
(1988).
|
||
 Improved tools for biological sequence comparison.
|
||
Proceedings of the National Academy of Sciences of the United States of America, 85(8). 2444–2448. Retrieved from 
|
||
http://www.ncbi.nlm.nih.gov/pubmed/3162770
|
||
|
||
|
||
|
||
|
||
)
|
||
.">
|
||
<meta name="theme-color" media="(prefers-color-scheme: light)" content="#ffffff">
|
||
<meta name="theme-color" media="(prefers-color-scheme: dark)" content="#343a40">
|
||
<meta name="color-scheme" content="light dark"><meta property="og:url" content="http://metabar:8888/obidoc/formats/fasta/">
|
||
<meta property="og:site_name" content="OBITools4 documentation">
|
||
<meta property="og:title" content="FASTA file format">
|
||
<meta property="og:description" content="The FASTA sequence file format # The FASTA sequence file format is the most widely used sequence file format. This is probably due to its simplicity. It was originally created for the Lipman and Pearson FASTA program ( Citation: Pearson & Lipman, 1988 Pearson, W. & Lipman, D. (1988). Improved tools for biological sequence comparison. Proceedings of the National Academy of Sciences of the United States of America, 85(8). 2444–2448. Retrieved from http://www.ncbi.nlm.nih.gov/pubmed/3162770 ) .">
|
||
<meta property="og:locale" content="en_us">
|
||
<meta property="og:type" content="website">
|
||
<title>FASTA file format | OBITools4 documentation</title>
|
||
<link rel="icon" href="/obidoc/favicon.png" >
|
||
<link rel="manifest" href="/obidoc/manifest.json">
|
||
<link rel="canonical" href="http://metabar:8888/obidoc/formats/fasta/">
|
||
<link rel="stylesheet" href="/obidoc/book.min.5fd7b8e2d1c0ae15da279c52ff32731130386f71b58f011468f20d0056fe6b78.css" integrity="sha256-X9e44tHArhXaJ5xS/zJzETA4b3G1jwEUaPINAFb+a3g=" crossorigin="anonymous">
|
||
<script defer src="/obidoc/fuse.min.js"></script>
|
||
<script defer src="/obidoc/en.search.min.4da51bdd2d833922fdbc0e19df517221387fc625ffb68ee140d605b3c5b68058.js" integrity="sha256-TaUb3S2DOSL9vA4Z31FyITh/xiX/to7hQNYFs8W2gFg=" crossorigin="anonymous"></script>
|
||
|
||
<script defer src="/obidoc/sw.min.32af8eafce4180aa1c5dea66d99fb26ba9043ea7c7a4c706138c91d9051b285e.js" integrity="sha256-Mq+Or85BgKocXepm2Z+ya6kEPqfHpMcGE4yR2QUbKF4=" crossorigin="anonymous"></script>
|
||
<link rel="alternate" type="application/rss+xml" href="http://metabar:8888/obidoc/formats/fasta/index.xml" title="OBITools4 documentation" />
|
||
<!--
|
||
Made with Book Theme
|
||
https://github.com/alex-shpak/hugo-book
|
||
-->
|
||
<link rel="stylesheet" type="text/css" href="http://metabar:8888/obidoc/hugo-cite.css" />
|
||
</head>
|
||
<body dir="ltr">
|
||
<input type="checkbox" class="hidden toggle" id="menu-control" />
|
||
<input type="checkbox" class="hidden toggle" id="toc-control" />
|
||
<main class="container flex">
|
||
<aside class="book-menu">
|
||
<div class="book-menu-content">
|
||
|
||
<nav>
|
||
<h2 class="book-brand">
|
||
<a class="flex align-center" href="/obidoc/"><img src="/obidoc/obitools_logo.jpg" alt="Logo" class="book-icon" /><span>OBITools4 documentation</span>
|
||
</a>
|
||
</h2>
|
||
|
||
|
||
<div class="book-search hidden">
|
||
<input type="text" id="book-search-input" placeholder="Search" aria-label="Search" maxlength="64" data-hotkeys="s/" />
|
||
<div class="book-search-spinner hidden"></div>
|
||
<ul id="book-search-results"></ul>
|
||
</div>
|
||
<script>document.querySelector(".book-search").classList.remove("hidden")</script>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<span>Docs</span>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/docs/about/" class="">About</a>
|
||
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/docs/installation/" class="">Installation</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/docs/principles/" class="">General operating principles</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<input type="checkbox" id="section-08756b4c1f14be6ee584ece005b9f621" class="toggle" checked />
|
||
<label for="section-08756b4c1f14be6ee584ece005b9f621" class="flex justify-between">
|
||
<a role="button" class="">File formats</a>
|
||
</label>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<input type="checkbox" id="section-933c2e64b905b84e22aa5273cea2d0bd" class="toggle" checked />
|
||
<label for="section-933c2e64b905b84e22aa5273cea2d0bd" class="flex justify-between">
|
||
<a role="button" class="">Sequence file formats</a>
|
||
</label>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/formats/fasta/" class="active">FASTA file format</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/formats/fastq/" class="">FASTQ file format</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/formats/genbank/" class="">GenBank Flat File format</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/formats/embl/" class="">EMBL Flat File format</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/docs/file_format/sequence_files/csv/" class="">CSV format</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/formats/json/" class="">JSON format</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/docs/file_format/sequence_files/annotations/" class="">Annotation of sequences</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<input type="checkbox" id="section-0258ae1c222f9a38cc1b75254c93b0f4" class="toggle" />
|
||
<label for="section-0258ae1c222f9a38cc1b75254c93b0f4" class="flex justify-between">
|
||
<a role="button" class="">Taxonomy file formats</a>
|
||
</label>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/docs/file_format/taxonomy_file/csv_taxdump/" class="">CSV formatted taxdump</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/docs/file_format/taxonomy_file/ncbi_taxdump/" class="">NCBI taxdump</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/formats/csv/" class="">The CSV format</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<input type="checkbox" id="section-70b1e6e5ec7f3ccab643155fa50659b6" class="toggle" />
|
||
<label for="section-70b1e6e5ec7f3ccab643155fa50659b6" class="flex justify-between">
|
||
<a role="button" class="">Patterns</a>
|
||
</label>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/docs/patterns/regular/" class="">Regular Expressions</a>
|
||
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/docs/patterns/dnagrep/" class="">DNA Patterns</a>
|
||
|
||
|
||
</li>
|
||
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<input type="checkbox" id="section-8223f464911a1fe6c655972143684e93" class="toggle" />
|
||
<label for="section-8223f464911a1fe6c655972143684e93" class="flex justify-between">
|
||
<a role="button" class="">The OBITools4 commands</a>
|
||
</label>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/docs/commands/options/" class="">Shared command options</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<input type="checkbox" id="section-8921ea65523c266b128dd4263232b0fc" class="toggle" />
|
||
<label for="section-8921ea65523c266b128dd4263232b0fc" class="flex justify-between">
|
||
<a role="button" class="">Basics</a>
|
||
</label>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/obitools/obiannotate/" class="">obiannotate</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/obitools/obicomplement/" class="">obicomplement</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/obitools/obiconvert/" class="">obiconvert</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/obitools/obicount/" class="">obicount</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/obitools/obicsv/" class="">obicsv</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/obitools/obidemerge/" class="">obidemerge</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/obitools/obidistribute/" class="">obidistribute</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/obitools/obigrep/" class="">obigrep</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/obitools/obijoin/" class="">obijoin</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/obitools/obimatrix/" class="">obimatrix</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/obitools/obisplit/" class="">obisplit</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/obitools/obisummary/" class="">obisummary</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/obitools/obiuniq/" class="">obiuniq</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<input type="checkbox" id="section-dbdf1bb5377572439394e60e08c30f50" class="toggle" />
|
||
<label for="section-dbdf1bb5377572439394e60e08c30f50" class="flex justify-between">
|
||
<a role="button" class="">Demultiplexing samples</a>
|
||
</label>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/obitools/obimultiplex/" class="">obimultiplex</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/obitools/obitagpcr/" class="">obitagpcr</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<input type="checkbox" id="section-aa98fedd067b51150db59691a8ea8edd" class="toggle" />
|
||
<label for="section-aa98fedd067b51150db59691a8ea8edd" class="flex justify-between">
|
||
<a role="button" class="">Sequence alignments</a>
|
||
</label>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/obitools/obiclean/" class="">obiclean</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<input type="checkbox" id="section-7433746525d8c2b29b033f765c869acd" class="toggle" />
|
||
<label for="section-7433746525d8c2b29b033f765c869acd" class="flex justify-between">
|
||
<a href="/obidoc/obitools/obipairing/" class="">obipairing</a>
|
||
</label>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/docs/commands/alignments/obipairing/fasta-like/" class="">The FASTA-like alignment</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/docs/commands/alignments/obipairing/exact-alignment/" class="">Exact alignment</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/obitools/obipcr/" class="">obipcr</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/obitools/obirefidx/" class="">obirefidx</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/obitools/obitag/" class="">obitag</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<input type="checkbox" id="section-5746f699d10490780dec8e30ab2dd3ce" class="toggle" />
|
||
<label for="section-5746f699d10490780dec8e30ab2dd3ce" class="flex justify-between">
|
||
<a role="button" class="">Taxonomy</a>
|
||
</label>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/obitools/obitaxonomy/" class="">obitaxonomy</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<input type="checkbox" id="section-3f50c4fe7ab436a56ae92897d5444956" class="toggle" />
|
||
<label for="section-3f50c4fe7ab436a56ae92897d5444956" class="flex justify-between">
|
||
<a role="button" class="">Advanced tools</a>
|
||
</label>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/obitools/obiscript/" class="">obiscript</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<input type="checkbox" id="section-549be3934679fcb82a232f6bd5435563" class="toggle" />
|
||
<label for="section-549be3934679fcb82a232f6bd5435563" class="flex justify-between">
|
||
<a role="button" class="">Others</a>
|
||
</label>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/obitools/obimicrosat/" class="">obimicrosat</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<input type="checkbox" id="section-ceca4455173761e30cbc0a6dc2327167" class="toggle" />
|
||
<label for="section-ceca4455173761e30cbc0a6dc2327167" class="flex justify-between">
|
||
<a role="button" class="">Experimentals</a>
|
||
</label>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/obitools/obicleandb/" class="">obicleandb</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/obitools/obiconsensus/" class="">obiconsensus</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/obitools/obilandmark/" class="">obilandmark</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/docs/commands/tags/" class="">Glossary of tags</a>
|
||
|
||
|
||
</li>
|
||
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<input type="checkbox" id="section-9b1bcd52530c59dc4819b1f61c128f54" class="toggle" />
|
||
<label for="section-9b1bcd52530c59dc4819b1f61c128f54" class="flex justify-between">
|
||
<a role="button" class="">Cookbook</a>
|
||
</label>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/docs/cookbook/illumina/" class="">Analysing an Illumina data set</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/docs/cookbook/ecoprimers/" class="">Designing new barcodes</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/docs/cookbook/local_genbank/" class="">Prepare a local copy of Genbank</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/docs/cookbook/reference_db/" class="">Build a reference database</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/docs/cookbook/minion/" class="">Oxford Nanopore data analysis</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<span>Programming OBITools</span>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/docs/programming/expression/" class="">Expression language</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<input type="checkbox" id="section-6d580829a667b5cca790b286d99a10fe" class="toggle" />
|
||
<label for="section-6d580829a667b5cca790b286d99a10fe" class="flex justify-between">
|
||
<a href="/obidoc/docs/programming/lua/" class="">Lua: for scripting OBITools</a>
|
||
</label>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<input type="checkbox" id="section-2fb081dac812d624eea5f4268fca9e26" class="toggle" />
|
||
<label for="section-2fb081dac812d624eea5f4268fca9e26" class="flex justify-between">
|
||
<a role="button" class="">Obitools Classes</a>
|
||
</label>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/docs/programming/lua/obitools_classes/biosequence/" class="">BioSequence</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/docs/programming/lua/obitools_classes/biosequenceslice/" class="">BioSequenceSlice</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/docs/programming/lua/obitools_classes/taxonomy/" class="">Taxonomy</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/docs/programming/lua/obitools_classes/taxon/" class="">Taxon</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/obidoc/docs/programming/lua/obitools_classes/mutex/" class="">Mutex</a>
|
||
|
||
|
||
|
||
<ul>
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
</ul>
|
||
|
||
</li>
|
||
|
||
|
||
</ul>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
</nav>
|
||
|
||
|
||
|
||
|
||
<script>(function(){var e=document.querySelector("aside .book-menu-content");addEventListener("beforeunload",function(){localStorage.setItem("menu.scrollTop",e.scrollTop)}),e.scrollTop=localStorage.getItem("menu.scrollTop")})()</script>
|
||
|
||
|
||
|
||
</div>
|
||
</aside>
|
||
|
||
<div class="book-page">
|
||
<header class="book-header">
|
||
|
||
<div class="flex align-center justify-between">
|
||
<label for="menu-control">
|
||
<img src="/obidoc/svg/menu.svg" class="book-icon" alt="Menu" />
|
||
</label>
|
||
|
||
<h3>FASTA file format</h3>
|
||
|
||
<label for="toc-control">
|
||
|
||
<img src="/obidoc/svg/toc.svg" class="book-icon" alt="Table of Contents" />
|
||
|
||
</label>
|
||
</div>
|
||
|
||
|
||
|
||
<aside class="hidden clearfix">
|
||
|
||
|
||
<nav id="TableOfContents">
|
||
<ul>
|
||
<li><a href="#the-fasta-sequence-file-format">The <em>FASTA</em> sequence file format</a>
|
||
<ul>
|
||
<li><a href="#references">References</a></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</nav>
|
||
|
||
|
||
|
||
</aside>
|
||
|
||
|
||
</header>
|
||
|
||
|
||
|
||
<article class="markdown book-article"><h1 id="the-fasta-sequence-file-format">
|
||
The <em>FASTA</em> sequence file format
|
||
<a class="anchor" href="#the-fasta-sequence-file-format">#</a>
|
||
</h1>
|
||
<p>The <em>FASTA</em> sequence file format is the most widely used sequence file format. This is probably due to its simplicity. It was originally created for the Lipman and Pearson
|
||
<a href="https://en.wikipedia.org/wiki/FASTA">FASTA program</a>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<span class="hugo-cite-intext"
|
||
itemprop="citation">(<span class="hugo-cite-group">
|
||
|
||
<a href="#pearson1988aa"><span class="visually-hidden">Citation: </span><span itemprop="author" itemscope itemtype="https://schema.org/Person"><meta itemprop="givenName" content="W R"><span itemprop="familyName">Pearson</span></span> & <span itemprop="author" itemscope itemtype="https://schema.org/Person"><meta itemprop="givenName" content="D J"><span itemprop="familyName">Lipman</span></span>, <span itemprop="datePublished">1988</span></a><span class="hugo-cite-citation">
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<span itemscope
|
||
itemtype="https://schema.org/Article"
|
||
data-type="article"><span itemprop="author" itemscope itemtype="https://schema.org/Person"><span itemprop="familyName">Pearson</span>, 
|
||
<meta itemprop="givenName" content="W R" />
|
||
W.</span> & <span itemprop="author" itemscope itemtype="https://schema.org/Person"><span itemprop="familyName">Lipman</span>, 
|
||
<meta itemprop="givenName" content="D J" />
|
||
D.</span>
|
||
 
|
||
(<span itemprop="datePublished">1988</span>).
|
||
 <span itemprop="name">Improved tools for biological sequence comparison</span>.<i>
|
||
<span itemprop="about">Proceedings of the National Academy of Sciences of the United States of America</span>, 85(8)</i>. <span itemprop="pagination">2444–2448</span>. Retrieved from 
|
||
<a href="http://www.ncbi.nlm.nih.gov/pubmed/3162770"
|
||
itemprop="identifier"
|
||
itemtype="https://schema.org/URL">http://www.ncbi.nlm.nih.gov/pubmed/3162770</a></span>
|
||
|
||
|
||
|
||
|
||
</span></span>)</span>
|
||
.</p>
|
||
<p>In the <em>FASTA</em> format, a sequence is represented by a title line starting with a <strong>></strong> character, and the sequences themselves follow the
|
||
<a href="http://metabar:8888/obidoc/docs/patterns/dnagrep/#iupac-codes-for-ambiguous-bases"><code>iupac</code></a> code. The sequence is usually split into several other lines of the same length (expect for the last one). Several sequences can be stored in the same file. The first line of the next sequence also marks the end of the previous one.</p>
|
||
<pre tabindex="0"><code>>my_sequence this is my pretty sequence
|
||
ACGTTGCAGTACGTTGCAGTACGTTGCAGTACGTTGCAGTACGTTGCAGTACGTTGCAGT
|
||
GTGCTGACGTTGCAGTACGTTGCAGTACGTTGCAGTACGTTGCAGTACGTTGCAGTGTTT
|
||
AACGACGTTGCAGTACGTTGCAGT
|
||
</code></pre><p>The first word in the title line is the sequence identifier. The rest of the line is a description of the sequence. The <em>OBITools</em> extend this format by adding structured data to the title line. In the previous version of the <em>OBITools</em>, the structured data was stored after the sequence identifier in a <code>key=value;</code> format, as shown below. The sequence definition was stored as free text after the last <code>key=value;</code> pair.</p>
|
||
|
||
<a style="padding: 10px 20px; background-color: #cacaca; border: 1px solid #8e8080; border-bottom: none; border-radius: 5px 5px 0 0; box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1)"
|
||
href="two_sequences_obi2.fasta" download="two_sequences_obi2.fasta">📄 two_sequences_obi2.fasta</a>
|
||
<DIV style="border: 2px solid #8e8080; border-radius: 0 0 5px 5px; padding: 20px; background-color: white; ">
|
||
|
||
<pre tabindex="0"><code class="language-fasta" data-lang="fasta">>AB061527 obicleandb_level=family; count=1; family_name=Soricidae; genus_name=Sorex; genus_taxid=9379; obicleandb_trusted=2.2137847111025621e-13; species_name=Sorex unguiculatus; species_taxid=62275; taxid=62275; family_taxid=9376; Sorex unguiculatus mitochondrial NA, complete genome.
|
||
ttagccctaaacttaggtatttaatctaacaaaaatacccgtcagagaactactagcaat
|
||
agcttaaaactcaaaggacttggcggtgctttatatccct
|
||
>AL355887 species_name=Homo sapiens; family_taxid=9604; genus_name=Homo; obicleandb_trusted=0; genus_taxid=9605; obicleandb_level=genus; species_taxid=9606; taxid=9606; count=2; family_name=Hominidae; Human chromosome 14 NA sequence BAC R-179O11 of library RPCI-11 from chromosome 14 of Homo sapiens (Human)XXKW HTG.; HTGS_ACTIVFIN.
|
||
ttagccctaaactctagtagttacattaacaaaaccattcgtcagaatactacgagcaac
|
||
agcttaaaactcaaaggacctggcagttctttatatccct
|
||
</code></pre></td>
|
||
|
||
</DIV>
|
||
|
||
<p>With <em>OBITools4</em> a new format has been introduced to store structured data in the title line. The <em>key</em>/<em>value</em> annotation pairs are now formatted as a
|
||
<a href="https://en.wikipedia.org/wiki/JSON">JSON</a> map object. The definition is stored as an additional <em>key</em>/<em>value</em> pair using the <em>key</em> ‘definition’.</p>
|
||
|
||
<a style="padding: 10px 20px; background-color: #cacaca; border: 1px solid #8e8080; border-bottom: none; border-radius: 5px 5px 0 0; box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1)"
|
||
href="two_sequences_obi4.fasta" download="two_sequences_obi4.fasta">📄 two_sequences_obi4.fasta</a>
|
||
<DIV style="border: 2px solid #8e8080; border-radius: 0 0 5px 5px; padding: 20px; background-color: white; ">
|
||
|
||
<pre tabindex="0"><code class="language-fasta" data-lang="fasta">>AB061527 {"count":1,"definition":"Sorex unguiculatus mitochondrial NA, complete genome.","family_name":"Soricidae","family_taxid":9376,"genus_name":"Sorex","genus_taxid":9379,"obicleandb_level":"family","obicleandb_trusted":2.2137847111025621e-13,"species_name":"Sorex unguiculatus","species_taxid":62275,"taxid":62275}
|
||
ttagccctaaacttaggtatttaatctaacaaaaatacccgtcagagaactactagcaat
|
||
agcttaaaactcaaaggacttggcggtgctttatatccct
|
||
>AL355887 {"count":2,"definition":"Human chromosome 14 NA sequence BAC R-179O11 of library RPCI-11 from chromosome 14 of Homo sapiens (Human)XXKW HTG.; HTGS_ACTIVFIN.","family_name":"Hominidae","family_taxid":9604,"genus_name":"Homo","genus_taxid":9605,"obicleandb_level":"genus","obicleandb_trusted":0,"species_name":"Homo sapiens","species_taxid":9606,"taxid":9606}
|
||
ttagccctaaactctagtagttacattaacaaaaccattcgtcagaatactacgagcaac
|
||
agcttaaaactcaaaggacctggcagttctttatatccct
|
||
</code></pre></td>
|
||
|
||
</DIV>
|
||
|
||
<p>The <a href="http://metabar:8888/obidoc/obitools/obiconvert/">
|
||
<abbr title="obiconvert: convert format of a sequence file"><code>obiconvert</code></abbr>
|
||
</a> command, like all other <em>OBITools4</em> commands, has two options <code>--output-json-header</code> and <code>--output-OBI-header</code> to choose between the new
|
||
<a href="https://en.wikipedia.org/wiki/JSON">JSON</a> format and the old <em>OBITools</em> format. The <code>--output-OBI-header</code> option can be abbreviated to <code>-O</code>. By default, the new
|
||
<a href="https://en.wikipedia.org/wiki/JSON">JSON</a> <em>OBITools4</em> format is used, so only the <code>-O</code> option is really useful if the old format is required for compatibility with other software.</p>
|
||
<p>Converting from the new
|
||
<a href="https://en.wikipedia.org/wiki/JSON">JSON</a> format to the old <em>OBITools</em> format:</p>
|
||
<div class="highlight"><pre tabindex="0" style="color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-bash" data-lang="bash"><span style="display:flex;"><span>obiconvert -O two_sequences_obi4.fasta
|
||
</span></span></code></pre></div>
|
||
<pre tabindex="0"><code class="language-fasta" data-lang="fasta">>AB061527 obicleandb_level=family; count=1; family_name=Soricidae; genus_name=Sorex; genus_taxid=9379; obicleandb_trusted=2.2137847111025621e-13; species_name=Sorex unguiculatus; species_taxid=62275; taxid=62275; family_taxid=9376; Sorex unguiculatus mitochondrial NA, complete genome.
|
||
ttagccctaaacttaggtatttaatctaacaaaaatacccgtcagagaactactagcaat
|
||
agcttaaaactcaaaggacttggcggtgctttatatccct
|
||
>AL355887 species_name=Homo sapiens; family_taxid=9604; genus_name=Homo; obicleandb_trusted=0; genus_taxid=9605; obicleandb_level=genus; species_taxid=9606; taxid=9606; count=2; family_name=Hominidae; Human chromosome 14 NA sequence BAC R-179O11 of library RPCI-11 from chromosome 14 of Homo sapiens (Human)XXKW HTG.; HTGS_ACTIVFIN.
|
||
ttagccctaaactctagtagttacattaacaaaaccattcgtcagaatactacgagcaac
|
||
agcttaaaactcaaaggacctggcagttctttatatccct
|
||
</code></pre></td>
|
||
|
||
<p>Converting from the old <em>OBITools</em> format to the new
|
||
<a href="https://en.wikipedia.org/wiki/JSON">JSON</a> format:</p>
|
||
<div class="highlight"><pre tabindex="0" style="color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-bash" data-lang="bash"><span style="display:flex;"><span>obiconvert two_sequences_obi2.fasta
|
||
</span></span></code></pre></div>
|
||
<pre tabindex="0"><code class="language-fasta" data-lang="fasta">>AB061527 {"count":1,"definition":"Sorex unguiculatus mitochondrial NA, complete genome.","family_name":"Soricidae","family_taxid":9376,"genus_name":"Sorex","genus_taxid":9379,"obicleandb_level":"family","obicleandb_trusted":2.2137847111025621e-13,"species_name":"Sorex unguiculatus","species_taxid":62275,"taxid":62275}
|
||
ttagccctaaacttaggtatttaatctaacaaaaatacccgtcagagaactactagcaat
|
||
agcttaaaactcaaaggacttggcggtgctttatatccct
|
||
>AL355887 {"count":2,"definition":"Human chromosome 14 NA sequence BAC R-179O11 of library RPCI-11 from chromosome 14 of Homo sapiens (Human)XXKW HTG.; HTGS_ACTIVFIN.","family_name":"Hominidae","family_taxid":9604,"genus_name":"Homo","genus_taxid":9605,"obicleandb_level":"genus","obicleandb_trusted":0,"species_name":"Homo sapiens","species_taxid":9606,"taxid":9606}
|
||
ttagccctaaactctagtagttacattaacaaaaccattcgtcagaatactacgagcaac
|
||
agcttaaaactcaaaggacctggcagttctttatatccct
|
||
</code></pre></td>
|
||
|
||
<p>The actual format of the header is automatically detected when <em>OBITools4</em> commands read a FASTA file.</p>
|
||
<h2 id="references">
|
||
References
|
||
<a class="anchor" href="#references">#</a>
|
||
</h2>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<section class="hugo-cite-bibliography">
|
||
<dl>
|
||
|
||
|
||
<div id="pearson1988aa">
|
||
<dt>
|
||
Pearson & Lipman
|
||
|
||
|
||
(1988)</dt>
|
||
|
||
<dd>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<span itemscope
|
||
itemtype="https://schema.org/Article"
|
||
data-type="article"><span itemprop="author" itemscope itemtype="https://schema.org/Person"><span itemprop="familyName">Pearson</span>, 
|
||
<meta itemprop="givenName" content="W R" />
|
||
W.</span> & <span itemprop="author" itemscope itemtype="https://schema.org/Person"><span itemprop="familyName">Lipman</span>, 
|
||
<meta itemprop="givenName" content="D J" />
|
||
D.</span>
|
||
 
|
||
(<span itemprop="datePublished">1988</span>).
|
||
 <span itemprop="name">Improved tools for biological sequence comparison</span>.<i>
|
||
<span itemprop="about">Proceedings of the National Academy of Sciences of the United States of America</span>, 85(8)</i>. <span itemprop="pagination">2444–2448</span>. Retrieved from 
|
||
<a href="http://www.ncbi.nlm.nih.gov/pubmed/3162770"
|
||
itemprop="identifier"
|
||
itemtype="https://schema.org/URL">http://www.ncbi.nlm.nih.gov/pubmed/3162770</a></span>
|
||
|
||
|
||
|
||
|
||
</dd>
|
||
|
||
</div>
|
||
</dl>
|
||
</section>
|
||
|
||
|
||
|
||
</article>
|
||
|
||
|
||
|
||
<footer class="book-footer">
|
||
|
||
<div class="flex flex-wrap justify-between">
|
||
|
||
|
||
|
||
|
||
|
||
</div>
|
||
|
||
|
||
|
||
<script>(function(){function e(e){const t=window.getSelection(),n=document.createRange();n.selectNodeContents(e),t.removeAllRanges(),t.addRange(n)}document.querySelectorAll("pre code").forEach(t=>{t.addEventListener("click",function(){if(window.getSelection().toString())return;e(t.parentElement),navigator.clipboard&&navigator.clipboard.writeText(t.parentElement.textContent)})})})()</script>
|
||
|
||
|
||
|
||
|
||
</footer>
|
||
|
||
|
||
|
||
<div class="book-comments">
|
||
|
||
</div>
|
||
|
||
|
||
|
||
<label for="menu-control" class="hidden book-menu-overlay"></label>
|
||
</div>
|
||
|
||
|
||
<aside class="book-toc">
|
||
<div class="book-toc-content">
|
||
|
||
|
||
<nav id="TableOfContents">
|
||
<ul>
|
||
<li><a href="#the-fasta-sequence-file-format">The <em>FASTA</em> sequence file format</a>
|
||
<ul>
|
||
<li><a href="#references">References</a></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</nav>
|
||
|
||
|
||
|
||
</div>
|
||
</aside>
|
||
|
||
</main>
|
||
|
||
|
||
</body>
|
||
</html>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|