Resource List
List all resources, or create a new resource.
GET /api/t/?operationID=%22operation_2403%22
https://github.com/lolrenceH/TrieDedup", "biotoolsID": "triededup", "biotoolsCURIE": "biotools:triededup", "version": [], "otherID": [], "relation": [], "function": [ { "operation": [ { "uri": "http://edamontology.org/operation_3198", "term": "Read mapping" }, { "uri": "http://edamontology.org/operation_2409", "term": "Data handling" }, { "uri": "http://edamontology.org/operation_2403", "term": "Sequence analysis" } ], "input": [], "output": [], "note": null, "cmd": null } ], "toolType": [ "Command-line tool", "Script" ], "topic": [ { "uri": "http://edamontology.org/topic_3168", "term": "Sequencing" }, { "uri": "http://edamontology.org/topic_0102", "term": "Mapping" }, { "uri": "http://edamontology.org/topic_3519", "term": "PCR experiment" } ], "operatingSystem": [], "language": [ "Python" ], "license": "Apache-2.0", "collectionID": [], "maturity": null, "cost": "Free of charge", "accessibility": "Open access", "elixirPlatform": [], "elixirNode": [], "elixirCommunity": [], "link": [], "download": [], "documentation": [], "publication": [ { "doi": "10.1186/s12859-024-05775-w", "pmid": "38637756", "pmcid": "PMC11025179", "type": [], "version": null, "note": null, "metadata": { "title": "TrieDedup: a fast trie-based deduplication algorithm to handle ambiguous bases in high-throughput sequencing", "abstract": "Background: High-throughput sequencing is a powerful tool that is extensively applied in biological studies. However, sequencers may produce low-quality bases, leading to ambiguous bases, ‘N’s. PCR duplicates introduced in library preparation are conventionally removed in genomics studies, and several deduplication tools have been developed for this purpose. Two identical reads may appear different due to ambiguous bases and the existing tools cannot address ‘N’s correctly or efficiently. Results: Here we proposed and implemented TrieDedup, which uses the trie (prefix tree) data structure to compare and store sequences. TrieDedup can handle ambiguous base ‘N’s, and efficiently deduplicate at the level of raw sequences. We also reduced its memory usage by approximately 20% by implementing restrictedDict in Python. We benchmarked the performance of the algorithm and showed that TrieDedup can deduplicate reads up to 270-fold faster than pairwise comparison at a cost of 32-fold higher memory usage. Conclusions: The TrieDedup algorithm may facilitate PCR deduplication, barcode or UMI assignment, and repertoire diversity analysis of large-scale high-throughput sequencing datasets with its ultra-fast algorithm that can account for ambiguous bases due to sequencing errors.", "date": "2024-12-01T00:00:00Z", "citationCount": 0, "authors": [ { "name": "Hu J." }, { "name": "Luo S." }, { "name": "Tian M." }, { "name": "Ye A.Y." } ], "journal": "BMC Bioinformatics" } } ], "credit": [ { "name": "Adam Yongxin Ye", "email": "yeyx2626@gmail.com", "url": null, "orcidid": null, "gridid": null, "rorid": null, "fundrefid": null, "typeEntity": "Person", "typeRole": [], "note": null }, { "name": "Jianqiao Hu", "email": null, "url": null, "orcidid": null, "gridid": null, "rorid": null, "fundrefid": null, "typeEntity": "Person", "typeRole": [], "note": null } ], "community": null, "owner": "Pub2Tools", "additionDate": "2024-06-18T12:44:34.691271Z", "lastUpdate": "2024-06-18T12:44:34.694901Z", "editPermission": { "type": "private", "authors": [] }, "validated": 0, "homepage_status": 0, "elixir_badge": 0, "confidence_flag": "tool" }, { "name": "Infernal cmscan (EBI)", "description": "Infernal (\"INFERence of RNA ALignment\") is for searching DNA sequence databases (e.g. Rfam) for RNA structure and sequence similarities.", "homepage": "https://www.ebi.ac.uk/jdispatcher/rna/infernal_cmscan", "biotoolsID": "infernal_cmscan", "biotoolsCURIE": "biotools:infernal_cmscan", "version": [ "1" ], "otherID": [], "relation": [ { "biotoolsID": "infernal", "type": "uses" } ], "function": [ { "operation": [ { "uri": "http://edamontology.org/operation_2403", "term": "Sequence analysis" } ], "input": [], "output": [], "note": null, "cmd": null } ], "toolType": [ "Web application", "Web service" ], "topic": [ { "uri": "http://edamontology.org/topic_0080", "term": "Sequence analysis" } ], "operatingSystem": [ "Linux", "Windows", "Mac" ], "language": [], "license": null, "collectionID": [ "EBI Tools", "Job Dispatcher Tools" ], "maturity": null, "cost": null, "accessibility": null, "elixirPlatform": [], "elixirNode": [], "elixirCommunity": [], "link": [ { "url": "https://www.ebi.ac.uk/about/contact/support/job-dispatcher-services", "type": [ "Helpdesk" ], "note": null } ], "download": [ { "url": "http://eddylab.org/infernal/", "type": "Downloads page", "note": null, "version": null } ], "documentation": [ { "url": "http://www.ebi.ac.uk/about/terms-of-use", "type": [ "Terms of use" ], "note": null }, { "url": "https://www.ebi.ac.uk/jdispatcher/help", "type": [ "General" ], "note": null }, { "url": "http://eddylab.org/infernal/", "type": [ "General" ], "note": null } ], "publication": [ { "doi": "10.1093/bioinformatics/btt509", "pmid": null, "pmcid": null, "type": [ "Primary" ], "version": null, "note": null, "metadata": { "title": "Infernal 1.1: 100-fold faster RNA homology searches", "abstract": "Summary: Infernal builds probabilistic profiles of the sequence and secondary structure of an RNA family called covariance models (CMs) from structurally annotated multiple sequence alignments given as input. Infernal uses CMs to search for new family members in sequence databases and to create potentially large multiple sequence alignments. Version 1.1 of Infernal introduces a new filter pipeline for RNA homology search based on accelerated profile hidden Markov model (HMM) methods and HMM-banded CM alignment methods. This enables ∼100-fold acceleration over the previous version and ∼10 000-fold acceleration over exhaustive non-filtered CM searches. © The Author 2013. Published by Oxford University Press. All rights reserved.", "date": "2013-11-15T00:00:00Z", "citationCount": 1788, "authors": [ { "name": "Nawrocki E.P." }, { "name": "Eddy S.R." } ], "journal": "Bioinformatics" } }, { "doi": "10.1093/nar/gkae241", "pmid": "38597606", "pmcid": null, "type": [ "Other" ], "version": null, "note": null, "metadata": null }, { "doi": "10.1093/nar/gkac240", "pmid": null, "pmcid": null, "type": [ "Other" ], "version": null, "note": null, "metadata": { "title": "Search and sequence analysis tools services from EMBL-EBI in 2022", "abstract": "The EMBL-EBI search and sequence analysis tools frameworks provide integrated access to EMBL-EBI's data resources and core bioinformatics analytical tools. EBI Search (https://www.ebi.ac.uk/ebisearch) provides a full-text search engine across nearly 5 billion entries, while the Job Dispatcher tools framework (https://www.ebi.ac.uk/services) enables the scientific community to perform a diverse range of sequence analysis using popular bioinformatics applications. Both allow users to interact through user-friendly web applications, as well as via RESTful and SOAP-based APIs. Here, we describe recent improvements to these services and updates made to accommodate the increasing data requirements during the COVID-19 pandemic.", "date": "2022-07-05T00:00:00Z", "citationCount": 867, "authors": [ { "name": "Madeira F." }, { "name": "Pearce M." }, { "name": "Tivey A.R.N." }, { "name": "Basutkar P." }, { "name": "Lee J." }, { "name": "Edbali O." }, { "name": "Madhusoodanan N." }, { "name": "Kolesnikov A." }, { "name": "Lopez R." } ], "journal": "Nucleic Acids Research" } } ], "credit": [ { "name": "Sean Eddy", "email": null, "url": null, "orcidid": null, "gridid": null, "rorid": null, "fundrefid": null, "typeEntity": "Person", "typeRole": [ "Developer" ], "note": null }, { "name": "EMBL-EBI", "email": null, "url": null, "orcidid": null, "gridid": null, "rorid": null, "fundrefid": null, "typeEntity": "Institute", "typeRole": [ "Provider" ], "note": null }, { "name": "Job Dispatcher", "email": null, "url": "http://www.ebi.ac.uk/jdispatcher", "orcidid": null, "gridid": null, "rorid": null, "fundrefid": null, "typeEntity": "Project", "typeRole": [ "Primary contact" ], "note": null } ], "community": null, "owner": "jdispatcher", "additionDate": "2015-08-03T09:35:17Z", "lastUpdate": "2024-05-16T13:09:51.619759Z", "editPermission": { "type": "private", "authors": [] }, "validated": 1, "homepage_status": 0, "elixir_badge": 0, "confidence_flag": null }, { "name": "T-Coffee (EBI)", "description": "Multiple sequence alignment that combines several alignment methods.", "homepage": "https://www.ebi.ac.uk/jdispatcher/msa/tcoffee/", "biotoolsID": "TCoffee_EBI", "biotoolsCURIE": "biotools:TCoffee_EBI", "version": [ "1" ], "otherID": [], "relation": [ { "biotoolsID": "tcoffee", "type": "uses" } ], "function": [ { "operation": [ { "uri": "http://edamontology.org/operation_2403", "term": "Sequence analysis" }, { "uri": "http://edamontology.org/operation_0492", "term": "Multiple sequence alignment" } ], "input": [ { "data": { "uri": "http://edamontology.org/data_2044", "term": "Sequence" }, "format": [] } ], "output": [ { "data": { "uri": "http://edamontology.org/data_0863", "term": "Sequence alignment" }, "format": [] } ], "note": null, "cmd": null } ], "toolType": [ "Web application", "Web service" ], "topic": [ { "uri": "http://edamontology.org/topic_0080", "term": "Sequence analysis" } ], "operatingSystem": [ "Linux", "Windows", "Mac" ], "language": [], "license": null, "collectionID": [ "T-Coffee", "EBI Tools", "Job Dispatcher Tools" ], "maturity": "Mature", "cost": "Free of charge", "accessibility": "Open access", "elixirPlatform": [], "elixirNode": [], "elixirCommunity": [], "link": [ { "url": "https://www.ebi.ac.uk/about/contact/support/job-dispatcher-services", "type": [ "Helpdesk" ], "note": null } ], "download": [ { "url": "https://tcoffee.org/Projects/tcoffee/index.html", "type": "Downloads page", "note": null, "version": null } ], "documentation": [ { "url": "http://www.ebi.ac.uk/about/terms-of-use", "type": [ "Terms of use" ], "note": null }, { "url": "https://www.ebi.ac.uk/jdispatcher/help", "type": [ "General" ], "note": null }, { "url": "https://tcoffee.org/Projects/tcoffee/index.html", "type": [ "General" ], "note": null } ], "publication": [ { "doi": "10.1006/jmbi.2000.4042", "pmid": null, "pmcid": null, "type": [ "Primary" ], "version": null, "note": null, "metadata": { "title": "T-coffee: A novel method for fast and accurate multiple sequence alignment", "abstract": "We describe a new method (T-Coffee) for multiple sequence alignment that provides a dramatic improvement in accuracy with a modest sacrifice in speed as compared to the most commonly used alternatives. The method is broadly based on the popular progressive approach to multiple alignment but avoids the most serious pitfalls caused by the greedy nature of this algorithm. With T-Coffee we pre-process a data set of all pair-wise alignments between the sequences. This provides us with a library of alignment information that can be used to guide the progressive alignment. Intermediate alignments are then based not only on the sequences to be aligned next but also on how all of the sequences align with each other. This alignment information can be derived from heterogeneous sources such as a mixture of alignment programs and/or structure superposition. Here, we illustrate the power of the approach by using a combination of local and global pair-wise alignments to generate the library. The resulting alignments are significantly more reliable, as determined by comparison with a set of 141 test cases, than any of the popular alternatives that we tried. The improvement, especially clear with the more difficult test cases, is always visible, regardless of the phylogenetic spread of the sequences in the tests. (C) 2000 Academic Press.", "date": "2000-09-08T00:00:00Z", "citationCount": 5714, "authors": [ { "name": "Notredame C." }, { "name": "Higgins D.G." }, { "name": "Heringa J." } ], "journal": "Journal of Molecular Biology" } }, { "doi": "10.1093/nar/gkae241", "pmid": "38597606", "pmcid": null, "type": [ "Other" ], "version": null, "note": null, "metadata": null }, { "doi": "10.1093/nar/gkac240", "pmid": null, "pmcid": null, "type": [ "Other" ], "version": null, "note": null, "metadata": { "title": "Search and sequence analysis tools services from EMBL-EBI in 2022", "abstract": "The EMBL-EBI search and sequence analysis tools frameworks provide integrated access to EMBL-EBI's data resources and core bioinformatics analytical tools. EBI Search (https://www.ebi.ac.uk/ebisearch) provides a full-text search engine across nearly 5 billion entries, while the Job Dispatcher tools framework (https://www.ebi.ac.uk/services) enables the scientific community to perform a diverse range of sequence analysis using popular bioinformatics applications. Both allow users to interact through user-friendly web applications, as well as via RESTful and SOAP-based APIs. Here, we describe recent improvements to these services and updates made to accommodate the increasing data requirements during the COVID-19 pandemic.", "date": "2022-07-05T00:00:00Z", "citationCount": 867, "authors": [ { "name": "Madeira F." }, { "name": "Pearce M." }, { "name": "Tivey A.R.N." }, { "name": "Basutkar P." }, { "name": "Lee J." }, { "name": "Edbali O." }, { "name": "Madhusoodanan N." }, { "name": "Kolesnikov A." }, { "name": "Lopez R." } ], "journal": "Nucleic Acids Research" } } ], "credit": [ { "name": "Cedric Notredame", "email": null, "url": null, "orcidid": null, "gridid": null, "rorid": null, "fundrefid": null, "typeEntity": "Person", "typeRole": [ "Developer" ], "note": null }, { "name": "EMBL-EBI", "email": null, "url": null, "orcidid": null, "gridid": null, "rorid": null, "fundrefid": null, "typeEntity": "Institute", "typeRole": [ "Provider" ], "note": null }, { "name": "CRG", "email": null, "url": null, "orcidid": null, "gridid": null, "rorid": null, "fundrefid": null, "typeEntity": "Institute", "typeRole": [ "Provider" ], "note": null }, { "name": "Job Dispatcher", "email": null, "url": "https://www.ebi.ac.uk/jdispatcher", "orcidid": null, "gridid": null, "rorid": null, "fundrefid": null, "typeEntity": "Project", "typeRole": [ "Primary contact" ], "note": null } ], "community": null, "owner": "jdispatcher", "additionDate": "2015-01-29T15:47:33Z", "lastUpdate": "2024-05-16T11:40:53.572187Z", "editPermission": { "type": "group", "authors": [ "nandana", "biomadeira" ] }, "validated": 0, "homepage_status": 0, "elixir_badge": 0, "confidence_flag": null }, { "name": "COVIDep", "description": "COVIDep provides an up-to-date set of B-cell and T-cell epitopes that can serve as potential vaccine targets for SARS-CoV-2. The identified epitopes are experimentally-derived from the 2003 SARS virus and have a close genetic match with the available SARS-CoV-2 sequences. COVIDep is flexible and user-friendly, comprising an intuitive graphical interface and interactive visualizations.", "homepage": "https://covidep.ust.hk", "biotoolsID": "covidep", "biotoolsCURIE": "biotools:covidep", "version": [ "1.0" ], "otherID": [], "relation": [], "function": [ { "operation": [ { "uri": "http://edamontology.org/operation_2403", "term": "Sequence analysis" }, { "uri": "http://edamontology.org/operation_0337", "term": "Visualisation" }, { "uri": "http://edamontology.org/operation_2423", "term": "Prediction and recognition" }, { "uri": "http://edamontology.org/operation_2479", "term": "Protein sequence analysis" }, { "uri": "http://edamontology.org/operation_0416", "term": "Epitope mapping" } ], "input": [], "output": [], "note": null, "cmd": null } ], "toolType": [ "Web application" ], "topic": [ { "uri": "http://edamontology.org/topic_0080", "term": "Sequence analysis" }, { "uri": "http://edamontology.org/topic_0092", "term": "Data visualisation" }, { "uri": "http://edamontology.org/topic_2269", "term": "Statistics and probability" }, { "uri": "http://edamontology.org/topic_3948", "term": "Immunoinformatics" }, { "uri": "http://edamontology.org/topic_3474", "term": "Machine learning" } ], "operatingSystem": [ "Linux", "Windows", "Mac" ], "language": [ "R" ], "license": "MIT", "collectionID": [ "COVID-19" ], "maturity": "Emerging", "cost": "Free of charge", "accessibility": "Open access", "elixirPlatform": [], "elixirNode": [], "elixirCommunity": [], "link": [ { "url": "https://github.com/COVIDep/COVIDep", "type": [ "Repository" ], "note": null } ], "download": [ { "url": "https://github.com/COVIDep/COVIDep", "type": "Source code", "note": null, "version": null } ], "documentation": [ { "url": "https://covidep.ust.hk", "type": [ "User manual" ], "note": "See \"How to use COVIDep\" page" } ], "publication": [ { "doi": "10.1038/s41596-020-0358-9", "pmid": "32555466", "pmcid": "PMC7299140", "type": [ "Primary" ], "version": null, "note": "Ahmed, S.F., Quadeer, A.A. & McKay, M.R. COVIDep: a web-based platform for real-time reporting of vaccine target recommendations for SARS-CoV-2. Nature Protocols (2020).", "metadata": { "title": "COVIDep: a web-based platform for real-time reporting of vaccine target recommendations for SARS-CoV-2", "abstract": "", "date": "2020-07-01T00:00:00Z", "citationCount": 16, "authors": [ { "name": "Ahmed S.F." }, { "name": "Quadeer A.A." }, { "name": "McKay M.R." } ], "journal": "Nature Protocols" } } ], "credit": [ { "name": null, "email": "covidep@ust.hk", "url": null, "orcidid": null, "gridid": null, "rorid": null, "fundrefid": null, "typeEntity": "Person", "typeRole": [ "Support" ], "note": null } ], "community": null, "owner": "Wting", "additionDate": "2020-06-18T04:24:54Z", "lastUpdate": "2024-03-20T14:04:03.289887Z", "editPermission": { "type": "group", "authors": [ "Wting", "DNN" ] }, "validated": 1, "homepage_status": 0, "elixir_badge": 0, "confidence_flag": null }, { "name": "compleasm", "description": "\"Compleasm: a faster and more accurate reimplementation of BUSCO\"", "homepage": "https://github.com/huangnengCSU/compleasm", "biotoolsID": "compleasm", "biotoolsCURIE": "biotools:compleasm", "version": [ "0.2.5" ], "otherID": [], "relation": [], "function": [ { "operation": [ { "uri": "http://edamontology.org/operation_3180", "term": "Sequence assembly validation" }, { "uri": "http://edamontology.org/operation_2403", "term": "Sequence analysis" }, { "uri": "http://edamontology.org/operation_3216", "term": "Scaffolding" }, { "uri": "http://edamontology.org/operation_3258", "term": "Transcriptome assembly" } ], "input": [], "output": [], "note": null, "cmd": null } ], "toolType": [ "Command-line tool" ], "topic": [ { "uri": "http://edamontology.org/topic_0196", "term": "Sequence assembly" }, { "uri": "http://edamontology.org/topic_0622", "term": "Genomics" }, { "uri": "http://edamontology.org/topic_3308", "term": "Transcriptomics" }, { "uri": "http://edamontology.org/topic_0080", "term": "Sequence analysis" } ], "operatingSystem": [], "language": [ "Python" ], "license": "Apache-2.0", "collectionID": [], "maturity": "Mature", "cost": "Free of charge", "accessibility": "Open access", "elixirPlatform": [], "elixirNode": [], "elixirCommunity": [], "link": [], "download": [], "documentation": [], "publication": [], "credit": [], "community": null, "owner": "rlibouban", "additionDate": "2024-03-18T14:51:49.667412Z", "lastUpdate": "2024-03-18T14:51:49.669973Z", "editPermission": { "type": "public", "authors": [] }, "validated": 0, "homepage_status": 0, "elixir_badge": 0, "confidence_flag": null }, { "name": "Proteinortho", "description": "Proteinortho is a tool to detect orthologous genes within different species", "homepage": "https://gitlab.com/paulklemm_PHD/proteinortho", "biotoolsID": "proteinortho", "biotoolsCURIE": "biotools:proteinortho", "version": [ "6.3.1" ], "otherID": [], "relation": [ { "biotoolsID": "Diamond", "type": "uses" }, { "biotoolsID": "BLAST", "type": "uses" } ], "function": [ { "operation": [ { "uri": "http://edamontology.org/operation_0291", "term": "Sequence clustering" }, { "uri": "http://edamontology.org/operation_2403", "term": "Sequence analysis" } ], "input": [ { "data": { "uri": "http://edamontology.org/data_2976", "term": "Protein sequence" }, "format": [ { "uri": "http://edamontology.org/format_1929", "term": "FASTA" } ] } ], "output": [ { "data": { "uri": "http://edamontology.org/data_2048", "term": "Report" }, "format": [ { "uri": "http://edamontology.org/format_3475", "term": "TSV" }, { "uri": "http://edamontology.org/format_2331", "term": "HTML" }, { "uri": "http://edamontology.org/format_2332", "term": "XML" } ] } ], "note": null, "cmd": "proteinortho input/*.faa" } ], "toolType": [ "Command-line tool", "Workflow" ], "topic": [ { "uri": "http://edamontology.org/topic_0797", "term": "Comparative genomics" } ], "operatingSystem": [ "Linux", "Mac", "Windows" ], "language": [ "Perl", "C++", "Python" ], "license": "GPL-2.0", "collectionID": [], "maturity": "Mature", "cost": "Free of charge", "accessibility": "Open access", "elixirPlatform": [], "elixirNode": [], "elixirCommunity": [], "link": [ { "url": "https://gitlab.com/paulklemm_PHD/proteinortho", "type": [ "Repository" ], "note": null }, { "url": "https://gitlab.com/paulklemm_PHD/proteinortho/-/issues?sort=created_date&state=opened", "type": [ "Issue tracker" ], "note": null }, { "url": "https://toolshed.g2.bx.psu.edu/repository?repository_id=584d8accff31aefe", "type": [ "Galaxy service" ], "note": null } ], "download": [ { "url": "https://gitlab.com/paulklemm_PHD/proteinortho/-/archive/master/proteinortho-master.zip", "type": "Source code", "note": "Download and unpack, compile with `make all`", "version": "latest" }, { "url": "https://packages.debian.org/unstable/proteinortho", "type": "Downloads page", "note": "Installation with dpkg (root privileges are required)", "version": null }, { "url": "https://anaconda.org/bioconda/proteinortho", "type": "Downloads page", "note": "conda install proteinortho", "version": null }, { "url": "https://formulae.brew.sh/formula/proteinortho", "type": "Downloads page", "note": "brew install proteinortho", "version": null } ], "documentation": [ { "url": "https://gitlab.com/paulklemm_PHD/proteinortho/-/releases", "type": [ "Release notes" ], "note": null }, { "url": "https://gitlab.com/paulklemm_PHD/proteinortho/-/wikis/home", "type": [ "FAQ" ], "note": null }, { "url": "https://gitlab.com/paulklemm_PHD/proteinortho", "type": [ "General" ], "note": null } ], "publication": [ { "doi": "10.3389/fbinf.2023.1322477", "pmid": null, "pmcid": null, "type": [ "Primary" ], "version": "version 6", "note": "For the version 6 of proteinortho", "metadata": { "title": "Proteinortho6: pseudo-reciprocal best alignment heuristic for graph-based detection of (co-)orthologs", "abstract": "Proteinortho is a widely used tool to predict (co)-orthologous groups of genes for any set of species. It finds application in comparative and functional genomics, phylogenomics, and evolutionary reconstructions. With a rapidly increasing number of available genomes, the demand for large-scale predictions is also growing. In this contribution, we evaluate and implement major algorithmic improvements that significantly enhance the speed of the analysis without reducing precision. Graph-based detection of (co-)orthologs is typically based on a reciprocal best alignment heuristic that requires an all vs. all comparison of proteins from all species under study. The initial identification of similar proteins is accelerated by introducing an alternative search tool along with a revised search strategy—the pseudo-reciprocal best alignment heuristic—that reduces the number of required sequence comparisons by one-half. The clustering algorithm was reworked to efficiently decompose very large clusters and accelerate processing. Proteinortho6 reduces the overall processing time by an order of magnitude compared to its predecessor while maintaining its small memory footprint and good predictive quality.", "date": "2023-01-01T00:00:00Z", "citationCount": 0, "authors": [ { "name": "Klemm P." }, { "name": "Stadler P.F." }, { "name": "Lechner M." } ], "journal": "Frontiers in Bioinformatics" } }, { "doi": "10.1186/1471-2105-12-124", "pmid": "21526987", "pmcid": "PMC3114741", "type": [], "version": "version 4 to 5", "note": "For version 4 to 5 of proteinortho", "metadata": { "title": "Proteinortho: Detection of (Co-)orthologs in large-scale analysis", "abstract": "Background: Orthology analysis is an important part of data analysis in many areas of bioinformatics such as comparative genomics and molecular phylogenetics. The ever-increasing flood of sequence data, and hence the rapidly increasing number of genomes that can be compared simultaneously, calls for efficient software tools as brute-force approaches with quadratic memory requirements become infeasible in practise. The rapid pace at which new data become available, furthermore, makes it desirable to compute genome-wide orthology relations for a given dataset rather than relying on relations listed in databases.Results: The program Proteinortho described here is a stand-alone tool that is geared towards large datasets and makes use of distributed computing techniques when run on multi-core hardware. It implements an extended version of the reciprocal best alignment heuristic. We apply Proteinortho to compute orthologous proteins in the complete set of all 717 eubacterial genomes available at NCBI at the beginning of 2009. We identified thirty proteins present in 99% of all bacterial proteomes.Conclusions: Proteinortho significantly reduces the required amount of memory for orthology analysis compared to existing tools, allowing such computations to be performed on off-the-shelf hardware. © 2011 Lechner et al; licensee BioMed Central Ltd.", "date": "2011-04-28T00:00:00Z", "citationCount": 799, "authors": [ { "name": "Lechner M." }, { "name": "Findeiss S." }, { "name": "Steiner L." }, { "name": "Marz M." }, { "name": "Stadler P.F." }, { "name": "Prohaska S.J." } ], "journal": "BMC Bioinformatics" } }, { "doi": "10.1371/journal.pone.0105015", "pmid": null, "pmcid": null, "type": [ "Other" ], "version": null, "note": "The synteny extension PoFF (-syteny option)", "metadata": { "title": "Orthology detection combining clustering and synteny for very large datasets", "abstract": "The elucidation of orthology relationships is an important step both in gene function prediction as well as towards understanding patterns of sequence evolution. Orthology assignments are usually derived directly from sequence similarities for large data because more exact approaches exhibit too high computational costs. Here we present PoFF, an extension for the standalone tool Proteinortho, which enhances orthology detection by combining clustering, sequence similarity, and synteny. In the course of this work, FFAdj-MCS, a heuristic that assesses pairwise gene order using adjacencies (a similarity measure related to the breakpoint distance) was adapted to support multiple linear chromosomes and extended to detect duplicated regions. PoFF largely reduces the number of false positives and enables more fine-grained predictions than purely similarity-based approaches. The extension maintains the low memory requirements and the efficient concurrency options of its basis Proteinortho, making the software applicable to very large datasets. © 2014 Lechner et al.", "date": "2014-08-19T00:00:00Z", "citationCount": 67, "authors": [ { "name": "Lechner M." }, { "name": "Hernandez-Rosales M." }, { "name": "Doerr D." }, { "name": "Wieseke N." }, { "name": "Thevenin A." }, { "name": "Stoye J." }, { "name": "Hartmann R.K." }, { "name": "Prohaska S.J." }, { "name": "Stadler P.F." } ], "journal": "PLoS ONE" } } ], "credit": [ { "name": "Marcus Lechner", "email": null, "url": null, "orcidid": null, "gridid": null, "rorid": null, "fundrefid": null, "typeEntity": "Person", "typeRole": [ "Primary contact", "Maintainer" ], "note": null }, { "name": "Paul Klemm", "email": null, "url": "https://gitlab.com/paulklemm", "orcidid": "https://orcid.org/0000-0002-3609-5713", "gridid": null, "rorid": null, "fundrefid": null, "typeEntity": "Person", "typeRole": [ "Maintainer" ], "note": null } ], "community": null, "owner": "klemmp", "additionDate": "2022-03-22T18:57:49.937151Z", "lastUpdate": "2024-03-13T22:17:22.229211Z", "editPermission": { "type": "private", "authors": [] }, "validated": 1, "homepage_status": 0, "elixir_badge": 0, "confidence_flag": null }, { "name": "nucleosome_prediction", "description": "Prediction of Nucleosomes Positions on the Genome", "homepage": "https://genie.weizmann.ac.il/software/nucleo_exe.html", "biotoolsID": "nucleosome_prediction", "biotoolsCURIE": "biotools:nucleosome_prediction", "version": [ "3.0" ], "otherID": [], "relation": [], "function": [ { "operation": [ { "uri": "http://edamontology.org/operation_2423", "term": "Prediction and recognition" }, { "uri": "http://edamontology.org/operation_0432", "term": "Nucleosome position prediction" }, { "uri": "http://edamontology.org/operation_2403", "term": "Sequence analysis" } ], "input": [], "output": [], "note": null, "cmd": null } ], "toolType": [ "Command-line tool" ], "topic": [ { "uri": "http://edamontology.org/topic_0122", "term": "Structural genomics" }, { "uri": "http://edamontology.org/topic_3511", "term": "Nucleic acid sites, features and motifs" } ], "operatingSystem": [], "language": [ "Perl" ], "license": null, "collectionID": [], "maturity": "Mature", "cost": "Free of charge", "accessibility": "Open access", "elixirPlatform": [], "elixirNode": [], "elixirCommunity": [], "link": [], "download": [], "documentation": [], "publication": [], "credit": [], "community": null, "owner": "rlibouban", "additionDate": "2024-03-13T09:49:24.750390Z", "lastUpdate": "2024-03-13T09:49:24.753027Z", "editPermission": { "type": "public", "authors": [] }, "validated": 0, "homepage_status": 0, "elixir_badge": 0, "confidence_flag": null }, { "name": "lorikeet", "description": "Tools for M. tuberculosis DNA fingerprinting (spoligotyping)", "homepage": "https://github.com/AbeelLab/lorikeet", "biotoolsID": "lorikeet", "biotoolsCURIE": "biotools:lorikeet", "version": [ "20" ], "otherID": [], "relation": [], "function": [ { "operation": [ { "uri": "http://edamontology.org/operation_2403", "term": "Sequence analysis" }, { "uri": "http://edamontology.org/operation_3196", "term": "Genotyping" } ], "input": [], "output": [], "note": null, "cmd": null } ], "toolType": [ "Bioinformatics portal" ], "topic": [ { "uri": "http://edamontology.org/topic_0625", "term": "Genotype and phenotype" } ], "operatingSystem": [], "language": [ "Java" ], "license": "GPL-3.0", "collectionID": [], "maturity": "Mature", "cost": null, "accessibility": "Open access", "elixirPlatform": [], "elixirNode": [], "elixirCommunity": [], "link": [], "download": [], "documentation": [], "publication": [], "credit": [], "community": null, "owner": "rlibouban", "additionDate": "2024-03-12T10:56:44.977314Z", "lastUpdate": "2024-03-12T10:56:44.980036Z", "editPermission": { "type": "public", "authors": [] }, "validated": 0, "homepage_status": 0, "elixir_badge": 0, "confidence_flag": null }, { "name": "BBTools", "description": "BBTools is a suite of fast, multithreaded bioinformatics tools designed for analysis of DNA and RNA sequence data. BBTools can handle common sequencing file formats such as fastq, fasta, sam, scarf, fasta+qual, compressed or raw, with autodetection of quality encoding and interleaving. It is written in Java and works on any platform supporting Java, including Linux, MacOS, and Microsoft Windows and Linux; there are no dependencies other than Java (version 7 or higher). Program descriptions and options are shown when running the shell scripts with no parameters.\n\nBBTools is open source and free for unlimited use, and is used regularly by DOE JGI and other institutions around the world.", "homepage": "https://jgi.doe.gov/data-and-tools/software-tools/bbtools/", "biotoolsID": "bbtools", "biotoolsCURIE": "biotools:bbtools", "version": [], "otherID": [], "relation": [], "function": [ { "operation": [ { "uri": "http://edamontology.org/operation_2403", "term": "Sequence analysis" } ], "input": [], "output": [], "note": null, "cmd": null } ], "toolType": [ "Suite" ], "topic": [ { "uri": "http://edamontology.org/topic_0080", "term": "Sequence analysis" } ], "operatingSystem": [], "language": [], "license": null, "collectionID": [], "maturity": null, "cost": null, "accessibility": null, "elixirPlatform": [], "elixirNode": [], "elixirCommunity": [], "link": [], "download": [], "documentation": [], "publication": [], "credit": [], "community": null, "owner": "paulzierep", "additionDate": "2024-03-12T09:39:30.275596Z", "lastUpdate": "2024-03-12T09:39:30.279447Z", "editPermission": { "type": "private", "authors": [] }, "validated": 0, "homepage_status": 0, "elixir_badge": 0, "confidence_flag": null }, { "name": "legsta", "description": "Performs in silico Legionella pneumophila sequence based typing", "homepage": "https://github.com/tseemann/legsta", "biotoolsID": "legsta", "biotoolsCURIE": "biotools:legsta", "version": [ "0.5.1", "0.3.7", "0.3.2", "0.2" ], "otherID": [], "relation": [], "function": [ { "operation": [ { "uri": "http://edamontology.org/operation_2403", "term": "Sequence analysis" } ], "input": [], "output": [], "note": null, "cmd": null } ], "toolType": [ "Bioinformatics portal", "Command-line tool" ], "topic": [ { "uri": "http://edamontology.org/topic_3305", "term": "Public health and epidemiology" } ], "operatingSystem": [], "language": [], "license": "GPL-3.0", "collectionID": [], "maturity": "Mature", "cost": "Free of charge", "accessibility": "Open access", "elixirPlatform": [], "elixirNode": [], "elixirCommunity": [], "link": [], "download": [], "documentation": [], "publication": [], "credit": [], "community": null, "owner": "rlibouban", "additionDate": "2024-03-11T16:40:14.136794Z", "lastUpdate": "2024-03-11T16:40:14.139298Z", "editPermission": { "type": "public", "authors": [] }, "validated": 0, "homepage_status": 0, "elixir_badge": 0, "confidence_flag": null } ] }{ "count": 411, "next": "?page=2", "previous": null, "list": [ { "name": "TrieDedup", "description": "A fast trie-based deduplication algorithm to handle ambiguous bases in high-throughput sequencing.", "homepage": "