2024 |
Waterhouse, Robert M; Adam-Blondon, Anne-Françoise; Balech, Bachir; Barta, Endre; Chua, Physilia Ying Shi; Cola, Valeria Di; Heil, Katharina F; Hughes, Graham M; Jermiin, Lars S; Kalaš, Matúš; Lanfear, Jerry; Pafilis, Evangelos; Palagi, Patricia M; Papageorgiou, Aristotelis C; Paupério, Joana; Psomopoulos, Fotis; Raes, Niels; Burgin, Josephine; Gabaldón, Toni The ELIXIR Biodiversity Community: Understanding short- and long-term changes in biodiversity Journal Article F1000Research, 12 , pp. 499, 2024, ISSN: 2046-1402. @article{waterhouse_elixir_2024, title = {The ELIXIR Biodiversity Community: Understanding short- and long-term changes in biodiversity}, author = {Robert M Waterhouse and Anne-Françoise Adam-Blondon and Bachir Balech and Endre Barta and Physilia Ying Shi Chua and Valeria Di Cola and Katharina F Heil and Graham M Hughes and Lars S Jermiin and Matúš Kalaš and Jerry Lanfear and Evangelos Pafilis and Patricia M Palagi and Aristotelis C Papageorgiou and Joana Paupério and Fotis Psomopoulos and Niels Raes and Josephine Burgin and Toni Gabaldón}, url = {https://f1000research.com/articles/12-499/v2 /wp-content/uploads/2024/07/2024-Waterhouse-F1000-Research-34.pdf}, doi = {10.12688/f1000research.133724.2}, issn = {2046-1402}, year = {2024}, date = {2024-07-16}, urldate = {2024-07-15}, journal = {F1000Research}, volume = {12}, pages = {499}, abstract = {Biodiversity loss is now recognised as one of the major challenges for humankind to address over the next few decades. Unless major actions are taken, the sixth mass extinction will lead to catastrophic effects on the Earth’s biosphere and human health and well-being. ELIXIR can help address the technical challenges of biodiversity science, through leveraging its suite of services and expertise to enable data management and analysis activities that enhance our understanding of life on Earth and facilitate biodiversity preservation and restoration. This white paper, prepared by the ELIXIR Biodiversity Community, summarises the current status and responses, and presents a set of plans, both technical and community-oriented, that should both enhance how ELIXIR Services are applied in the biodiversity field and how ELIXIR builds connections across the many other infrastructures active in this area. We discuss the areas of highest priority, how they can be implemented in cooperation with the ELIXIR Platforms, and their connections to existing ELIXIR Communities and international consortia. The article provides a preliminary blueprint for a Biodiversity Community in ELIXIR and is an appeal to identify and involve new stakeholders.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Biodiversity loss is now recognised as one of the major challenges for humankind to address over the next few decades. Unless major actions are taken, the sixth mass extinction will lead to catastrophic effects on the Earth’s biosphere and human health and well-being. ELIXIR can help address the technical challenges of biodiversity science, through leveraging its suite of services and expertise to enable data management and analysis activities that enhance our understanding of life on Earth and facilitate biodiversity preservation and restoration. This white paper, prepared by the ELIXIR Biodiversity Community, summarises the current status and responses, and presents a set of plans, both technical and community-oriented, that should both enhance how ELIXIR Services are applied in the biodiversity field and how ELIXIR builds connections across the many other infrastructures active in this area. We discuss the areas of highest priority, how they can be implemented in cooperation with the ELIXIR Platforms, and their connections to existing ELIXIR Communities and international consortia. The article provides a preliminary blueprint for a Biodiversity Community in ELIXIR and is an appeal to identify and involve new stakeholders. |
Aplakidou, Eleni; Vergoulidis, Nikolaos; Chasapi, Maria; Venetsianou, Nefeli K; Kokoli, Maria; Panagiotopoulou, Eleni; Iliopoulos, Ioannis; Karatzas, Evangelos; Pafilis, Evangelos; Georgakopoulos-Soares, Ilias; Kyrpides, Nikos C; Pavlopoulos, Georgios A; Baltoumas, Fotis A Visualizing metagenomic and metatranscriptomic data: A comprehensive review Journal Article Computational and Structural Biotechnology Journal, 23 , pp. 2011–2033, 2024, ISSN: 20010370. @article{aplakidou_visualizing_2024, title = {Visualizing metagenomic and metatranscriptomic data: A comprehensive review}, author = {Eleni Aplakidou and Nikolaos Vergoulidis and Maria Chasapi and Nefeli K Venetsianou and Maria Kokoli and Eleni Panagiotopoulou and Ioannis Iliopoulos and Evangelos Karatzas and Evangelos Pafilis and Ilias Georgakopoulos-Soares and Nikos C Kyrpides and Georgios A Pavlopoulos and Fotis A Baltoumas}, url = {https://linkinghub.elsevier.com/retrieve/pii/S2001037024001430 https://imbbc.hcmr.gr/wp-content/uploads/2024/05/2024-Aplakidou-CSB-Journal-25.pdf}, doi = {10.1016/j.csbj.2024.04.060}, issn = {20010370}, year = {2024}, date = {2024-05-14}, urldate = {2024-05-16}, journal = {Computational and Structural Biotechnology Journal}, volume = {23}, pages = {2011--2033}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
2023 |
Zafeiropoulos, Haris; Beracochea, Martin; Ninidakis, Stelios; Exter, Katrina; Potirakis, Antonis; De Moro, Gianluca; Richardson, Lorna; Corre, Erwan; Machado, João; Pafilis, Evangelos; Kotoulas, Georgios; Santi, Ioulia; Finn, Robert D; Cox, Cymon J; Pavloudi, Christina metaGOflow: a workflow for the analysis of marine Genomic Observatories shotgun metagenomics data Journal Article GigaScience, 12 , pp. giad078, 2023, ISSN: 2047-217X. @article{zafeiropoulos_metagoflow_2022, title = {metaGOflow: a workflow for the analysis of marine Genomic Observatories shotgun metagenomics data}, author = {Haris Zafeiropoulos and Martin Beracochea and Stelios Ninidakis and Katrina Exter and Antonis Potirakis and Gianluca De Moro and Lorna Richardson and Erwan Corre and João Machado and Evangelos Pafilis and Georgios Kotoulas and Ioulia Santi and Robert D Finn and Cymon J Cox and Christina Pavloudi}, url = {https://imbbc.hcmr.gr/wp-content/uploads/2023/10/2023-Zafeiropoulos-GiGa-57.pdf https://academic.oup.com/gigascience/article/doi/10.1093/gigascience/giad078/7321054}, doi = {10.1093/gigascience/giad078}, issn = {2047-217X}, year = {2023}, date = {2023-10-19}, urldate = {2023-10-23}, journal = {GigaScience}, volume = {12}, pages = {giad078}, abstract = {Abstract Background Genomic Observatories (GOs) are sites of long-term scientific study that undertake regular assessments of the genomic biodiversity. The European Marine Omics Biodiversity Observation Network (EMO BON) is a network of GOs that conduct regular biological community samplings to generate environmental and metagenomic data of microbial communities from designated marine stations around Europe. The development of an effective workflow is essential for the analysis of the EMO BON metagenomic data in a timely and reproducible manner. Findings Based on the established MGnify resource, we developed metaGOflow. metaGOflow supports the fast inference of taxonomic profiles from GO-derived data based on ribosomal RNA genes and their functional annotation using the raw reads. Thanks to the Research Object Crate packaging, relevant metadata about the sample under study, and the details of the bioinformatics analysis it has been subjected to, are inherited to the data product while its modular implementation allows running the workflow partially. The analysis of 2 EMO BON samples and 1 Tara Oceans sample was performed as a use case. Conclusions metaGOflow is an efficient and robust workflow that scales to the needs of projects producing big metagenomic data such as EMO BON. It highlights how containerization technologies along with modern workflow languages and metadata package approaches can support the needs of researchers when dealing with ever-increasing volumes of biological data. Despite being initially oriented to address the needs of EMO BON, metaGOflow is a flexible and easy-to-use workflow that can be broadly used for one-sample-at-a-time analysis of shotgun metagenomics data.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Abstract Background Genomic Observatories (GOs) are sites of long-term scientific study that undertake regular assessments of the genomic biodiversity. The European Marine Omics Biodiversity Observation Network (EMO BON) is a network of GOs that conduct regular biological community samplings to generate environmental and metagenomic data of microbial communities from designated marine stations around Europe. The development of an effective workflow is essential for the analysis of the EMO BON metagenomic data in a timely and reproducible manner. Findings Based on the established MGnify resource, we developed metaGOflow. metaGOflow supports the fast inference of taxonomic profiles from GO-derived data based on ribosomal RNA genes and their functional annotation using the raw reads. Thanks to the Research Object Crate packaging, relevant metadata about the sample under study, and the details of the bioinformatics analysis it has been subjected to, are inherited to the data product while its modular implementation allows running the workflow partially. The analysis of 2 EMO BON samples and 1 Tara Oceans sample was performed as a use case. Conclusions metaGOflow is an efficient and robust workflow that scales to the needs of projects producing big metagenomic data such as EMO BON. It highlights how containerization technologies along with modern workflow languages and metadata package approaches can support the needs of researchers when dealing with ever-increasing volumes of biological data. Despite being initially oriented to address the needs of EMO BON, metaGOflow is a flexible and easy-to-use workflow that can be broadly used for one-sample-at-a-time analysis of shotgun metagenomics data. |
Luoma, Jouni; Nastou, Katerina; Ohta, Tomoko; Toivonen, Harttu; Pafilis, Evangelos; Jensen, Lars Juhl; Pyysalo, Sampo S1000: a better taxonomic name corpus for biomedical information extraction Journal Article Bioinformatics, 39 (6), pp. btad369, 2023, ISSN: 1367-4811. @article{luoma_s1000_2023, title = {S1000: a better taxonomic name corpus for biomedical information extraction}, author = {Jouni Luoma and Katerina Nastou and Tomoko Ohta and Harttu Toivonen and Evangelos Pafilis and Lars Juhl Jensen and Sampo Pyysalo}, editor = {Zhiyong Lu}, url = {https://imbbc.hcmr.gr/wp-content/uploads/2023/07/2023-Luoma-et-al.-2023-Luoma-37.pdf https://academic.oup.com/bioinformatics/article/doi/10.1093/bioinformatics/btad369/7192170}, doi = {10.1093/bioinformatics/btad369}, issn = {1367-4811}, year = {2023}, date = {2023-07-14}, urldate = {2023-07-14}, journal = {Bioinformatics}, volume = {39}, number = {6}, pages = {btad369}, abstract = {Abstract Motivation The recognition of mentions of species names in text is a critically important task for biomedical text mining. While deep learning-based methods have made great advances in many named entity recognition tasks, results for species name recognition remain poor. We hypothesize that this is primarily due to the lack of appropriate corpora. Results We introduce the S1000 corpus, a comprehensive manual re-annotation and extension of the S800 corpus. We demonstrate that S1000 makes highly accurate recognition of species names possible (F-score =93.1%), both for deep learning and dictionary-based methods. Availability and implementation All resources introduced in this study are available under open licenses from https://jensenlab.org/resources/s1000/. The webpage contains links to a Zenodo project and three GitHub repositories associated with the study.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Abstract Motivation The recognition of mentions of species names in text is a critically important task for biomedical text mining. While deep learning-based methods have made great advances in many named entity recognition tasks, results for species name recognition remain poor. We hypothesize that this is primarily due to the lack of appropriate corpora. Results We introduce the S1000 corpus, a comprehensive manual re-annotation and extension of the S800 corpus. We demonstrate that S1000 makes highly accurate recognition of species names possible (F-score =93.1%), both for deep learning and dictionary-based methods. Availability and implementation All resources introduced in this study are available under open licenses from https://jensenlab.org/resources/s1000/. The webpage contains links to a Zenodo project and three GitHub repositories associated with the study. |
Kokoli, Maria; Karatzas, Evangelos; Baltoumas, Fotis A; Schneider, Reinhard; Pafilis, Evangelos; Paragkamian, Savvas; Doncheva, Nadezhda T; Jensen, Lars Juhl; Pavlopoulos, Georgios A NAR Genomics and Bioinformatics, 5 (2), pp. lqad053, 2023, ISSN: 2631-9268. @article{kokoli_arena3dweb_2023, title = {Arena3Dweb: interactive 3D visualization of multilayered networks supporting multiple directional information channels, clustering analysis and application integration}, author = {Maria Kokoli and Evangelos Karatzas and Fotis A Baltoumas and Reinhard Schneider and Evangelos Pafilis and Savvas Paragkamian and Nadezhda T Doncheva and Lars Juhl Jensen and Georgios A Pavlopoulos}, url = {https://imbbc.hcmr.gr/wp-content/uploads/2023/06/2023-Kokoli-GenBioinformatics-31.pdf https://academic.oup.com/nargab/article/doi/10.1093/nargab/lqad053/7185857}, doi = {10.1093/nargab/lqad053}, issn = {2631-9268}, year = {2023}, date = {2023-06-19}, urldate = {2023-06-19}, journal = {NAR Genomics and Bioinformatics}, volume = {5}, number = {2}, pages = {lqad053}, abstract = {Abstract Arena3Dweb is an interactive web tool that visualizes multi-layered networks in 3D space. In this update, Arena3Dweb supports directed networks as well as up to nine different types of connections between pairs of nodes with the use of Bézier curves. It comes with different color schemes (light/gray/dark mode), custom channel coloring, four node clustering algorithms which one can run on-the-fly, visualization in VR mode and predefined layer layouts (zig-zag, star and cube). This update also includes enhanced navigation controls (mouse orbit controls, layer dragging and layer/node selection), while its newly developed API allows integration with external applications as well as saving and loading of sessions in JSON format. Finally, a dedicated Cytoscape app has been developed, through which users can automatically send their 2D networks from Cytoscape to Arena3Dweb for 3D multi-layer visualization. Arena3Dweb is accessible at http://arena3d.pavlopouloslab.info or http://arena3d.org}, keywords = {}, pubstate = {published}, tppubtype = {article} } Abstract Arena3Dweb is an interactive web tool that visualizes multi-layered networks in 3D space. In this update, Arena3Dweb supports directed networks as well as up to nine different types of connections between pairs of nodes with the use of Bézier curves. It comes with different color schemes (light/gray/dark mode), custom channel coloring, four node clustering algorithms which one can run on-the-fly, visualization in VR mode and predefined layer layouts (zig-zag, star and cube). This update also includes enhanced navigation controls (mouse orbit controls, layer dragging and layer/node selection), while its newly developed API allows integration with external applications as well as saving and loading of sessions in JSON format. Finally, a dedicated Cytoscape app has been developed, through which users can automatically send their 2D networks from Cytoscape to Arena3Dweb for 3D multi-layer visualization. Arena3Dweb is accessible at http://arena3d.pavlopouloslab.info or http://arena3d.org |
Baltoumas, Fotis A; Karatzas, Evangelos; Paez-Espino, David; Venetsianou, Nefeli K; Aplakidou, Eleni; Oulas, Anastasis; Finn, Robert D; Ovchinnikov, Sergey; Pafilis, Evangelos; Kyrpides, Nikos C; Pavlopoulos, Georgios A Exploring microbial functional biodiversity at the protein family level—From metagenomic sequence reads to annotated protein clusters Journal Article Frontiers in Bioinformatics, 3 , pp. 1157956, 2023, ISSN: 2673-7647. @article{baltoumas_exploring_2023, title = {Exploring microbial functional biodiversity at the protein family level—From metagenomic sequence reads to annotated protein clusters}, author = {Fotis A Baltoumas and Evangelos Karatzas and David Paez-Espino and Nefeli K Venetsianou and Eleni Aplakidou and Anastasis Oulas and Robert D Finn and Sergey Ovchinnikov and Evangelos Pafilis and Nikos C Kyrpides and Georgios A Pavlopoulos}, url = {https://imbbc.hcmr.gr/wp-content/uploads/2023/03/2023-Baltoumas-FroΒionform-14.pdf https://www.frontiersin.org/articles/10.3389/fbinf.2023.1157956/full}, doi = {10.3389/fbinf.2023.1157956}, issn = {2673-7647}, year = {2023}, date = {2023-03-08}, urldate = {2023-03-08}, journal = {Frontiers in Bioinformatics}, volume = {3}, pages = {1157956}, abstract = {Metagenomics has enabled accessing the genetic repertoire of natural microbial communities. Metagenome shotgun sequencing has become the method of choice for studying and classifying microorganisms from various environments. To this end, several methods have been developed to process and analyze the sequence data from raw reads to end-products such as predicted protein sequences or families. In this article, we provide a thorough review to simplify such processes and discuss the alternative methodologies that can be followed in order to explore biodiversity at the protein family level. We provide details for analysis tools and we comment on their scalability as well as their advantages and disadvantages. Finally, we report the available data repositories and recommend various approaches for protein family annotation related to phylogenetic distribution, structure prediction and metadata enrichment.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Metagenomics has enabled accessing the genetic repertoire of natural microbial communities. Metagenome shotgun sequencing has become the method of choice for studying and classifying microorganisms from various environments. To this end, several methods have been developed to process and analyze the sequence data from raw reads to end-products such as predicted protein sequences or families. In this article, we provide a thorough review to simplify such processes and discuss the alternative methodologies that can be followed in order to explore biodiversity at the protein family level. We provide details for analysis tools and we comment on their scalability as well as their advantages and disadvantages. Finally, we report the available data repositories and recommend various approaches for protein family annotation related to phylogenetic distribution, structure prediction and metadata enrichment. |
2022 |
Zafeiropoulos, Haris; Paragkamian, Savvas; Ninidakis, Stelios; Pavlopoulos, Georgios A; Jensen, Lars Juhl; Pafilis, Evangelos PREGO: A Literature and Data-Mining Resource to Associate Microorganisms, Biological Processes, and Environment Types Journal Article Microorganisms, 10 (2), pp. 293, 2022, ISSN: 2076-2607. @article{zafeiropoulos_prego_2022, title = {PREGO: A Literature and Data-Mining Resource to Associate Microorganisms, Biological Processes, and Environment Types}, author = {Haris Zafeiropoulos and Savvas Paragkamian and Stelios Ninidakis and Georgios A Pavlopoulos and Lars Juhl Jensen and Evangelos Pafilis}, url = {https://imbbc.hcmr.gr/wp-content/uploads/2022/03/2022-Zafeiropoulos-Micro-12.pdf https://www.mdpi.com/2076-2607/10/2/293}, doi = {10.3390/microorganisms10020293}, issn = {2076-2607}, year = {2022}, date = {2022-01-01}, urldate = {2022-03-11}, journal = {Microorganisms}, volume = {10}, number = {2}, pages = {293}, abstract = {To elucidate ecosystem functioning, it is fundamental to recognize what processes occur in which environments (where) and which microorganisms carry them out (who). Here, we present PREGO, a one-stop-shop knowledge base providing such associations. PREGO combines text mining and data integration techniques to mine such what-where-who associations from data and metadata scattered in the scientific literature and in public omics repositories. Microorganisms, biological processes, and environment types are identified and mapped to ontology terms from established community resources. Analyses of comentions in text and co-occurrences in metagenomics data/metadata are performed to extract associations and a level of confidence is assigned to each of them thanks to a scoring scheme. The PREGO knowledge base contains associations for 364,508 microbial taxa, 1090 environmental types, 15,091 biological processes, and 7971 molecular functions with a total of almost 58 million associations. These associations are available through a web portal, an Application Programming Interface (API), and bulk download. By exploring environments and/or processes associated with each other or with microbes, PREGO aims to assist researchers in design and interpretation of experiments and their results. To demonstrate PREGO’s capabilities, a thorough presentation of its web interface is given along with a meta-analysis of experimental results from a lagoon-sediment study of sulfur-cycle related microbes.}, keywords = {}, pubstate = {published}, tppubtype = {article} } To elucidate ecosystem functioning, it is fundamental to recognize what processes occur in which environments (where) and which microorganisms carry them out (who). Here, we present PREGO, a one-stop-shop knowledge base providing such associations. PREGO combines text mining and data integration techniques to mine such what-where-who associations from data and metadata scattered in the scientific literature and in public omics repositories. Microorganisms, biological processes, and environment types are identified and mapped to ontology terms from established community resources. Analyses of comentions in text and co-occurrences in metagenomics data/metadata are performed to extract associations and a level of confidence is assigned to each of them thanks to a scoring scheme. The PREGO knowledge base contains associations for 364,508 microbial taxa, 1090 environmental types, 15,091 biological processes, and 7971 molecular functions with a total of almost 58 million associations. These associations are available through a web portal, an Application Programming Interface (API), and bulk download. By exploring environments and/or processes associated with each other or with microbes, PREGO aims to assist researchers in design and interpretation of experiments and their results. To demonstrate PREGO’s capabilities, a thorough presentation of its web interface is given along with a meta-analysis of experimental results from a lagoon-sediment study of sulfur-cycle related microbes. |
Paragkamian, Savvas; Sarafidou, Georgia; Mavraki, Dimitra; Pavloudi, Christina; Beja, Joana; Eliezer, Menashè; Lipizer, Marina; Boicenco, Laura; Vandepitte, Leen; Perez-Perez, Ruben; Zafeiropoulos, Haris; Arvanitidis, Christos; Pafilis, Evangelos; Gerovasileiou, Vasilis Automating the Curation Process of Historical Literature on Marine Biodiversity Using Text Mining: The DECO Workflow Journal Article Frontiers in Marine Science, 9 , pp. 940844, 2022, ISSN: 2296-7745. @article{paragkamian_automating_2022, title = {Automating the Curation Process of Historical Literature on Marine Biodiversity Using Text Mining: The DECO Workflow}, author = {Savvas Paragkamian and Georgia Sarafidou and Dimitra Mavraki and Christina Pavloudi and Joana Beja and Menashè Eliezer and Marina Lipizer and Laura Boicenco and Leen Vandepitte and Ruben Perez-Perez and Haris Zafeiropoulos and Christos Arvanitidis and Evangelos Pafilis and Vasilis Gerovasileiou}, url = {https://imbbc.hcmr.gr/wp-content/uploads/2022/07/2022-Paragkaminan-fmars-53.pdf }, doi = {10.3389/fmars.2022.940844}, issn = {2296-7745}, year = {2022}, date = {2022-01-01}, urldate = {2022-07-29}, journal = {Frontiers in Marine Science}, volume = {9}, pages = {940844}, abstract = {Historical biodiversity documents comprise an important link to the long-term data life cycle and provide useful insights on several aspects of biodiversity research and management. However, because of their historical context, they present specific challenges, primarily time- and effort-consuming in data curation. The data rescue process requires a multidisciplinary effort involving four tasks: (a) Document digitisation (b) Transcription, which involves text recognition and correction, and (c) Information Extraction, which is performed using text mining tools and involves the entity identification, their normalisation and their co-mentions in text. Finally, the extracted data go through (d) Publication to a data repository in a standardised format. Each of these tasks requires a dedicated multistep methodology with standards and procedures. During the past 8 years, Information Extraction (IE) tools have undergone remarkable advances, which created a landscape of various tools with distinct capabilities specific to biodiversity data. These tools recognise entities in text such as taxon names, localities, phenotypic traits and thus automate, accelerate and facilitate the curation process. Furthermore, they assist the normalisation and mapping of entities to specific identifiers. This work focuses on the IE step (c) from the marine historical biodiversity data perspective. It orchestrates IE tools and provides the curators with a unified view of the methodology; as a result the documentation of the strengths, limitations and dependencies of several tools was drafted. Additionally, the classification of tools into Graphical User Interface (web and standalone) applications and Command Line Interface ones enables the data curators to select the most suitable tool for their needs, according to their specific features. In addition, the high volume of already digitised marine documents that await curation is amassed and a demonstration of the methodology, with a new scalable, extendable and containerised tool, “DECO” (bioDivErsity data Curation programming wOrkflow) is presented. DECO’s usage will provide a solid basis for future curation initiatives and an augmented degree of reliability towards high value data products that allow for the connection between the past and the present, in marine biodiversity research.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Historical biodiversity documents comprise an important link to the long-term data life cycle and provide useful insights on several aspects of biodiversity research and management. However, because of their historical context, they present specific challenges, primarily time- and effort-consuming in data curation. The data rescue process requires a multidisciplinary effort involving four tasks: (a) Document digitisation (b) Transcription, which involves text recognition and correction, and (c) Information Extraction, which is performed using text mining tools and involves the entity identification, their normalisation and their co-mentions in text. Finally, the extracted data go through (d) Publication to a data repository in a standardised format. Each of these tasks requires a dedicated multistep methodology with standards and procedures. During the past 8 years, Information Extraction (IE) tools have undergone remarkable advances, which created a landscape of various tools with distinct capabilities specific to biodiversity data. These tools recognise entities in text such as taxon names, localities, phenotypic traits and thus automate, accelerate and facilitate the curation process. Furthermore, they assist the normalisation and mapping of entities to specific identifiers. This work focuses on the IE step (c) from the marine historical biodiversity data perspective. It orchestrates IE tools and provides the curators with a unified view of the methodology; as a result the documentation of the strengths, limitations and dependencies of several tools was drafted. Additionally, the classification of tools into Graphical User Interface (web and standalone) applications and Command Line Interface ones enables the data curators to select the most suitable tool for their needs, according to their specific features. In addition, the high volume of already digitised marine documents that await curation is amassed and a demonstration of the methodology, with a new scalable, extendable and containerised tool, “DECO” (bioDivErsity data Curation programming wOrkflow) is presented. DECO’s usage will provide a solid basis for future curation initiatives and an augmented degree of reliability towards high value data products that allow for the connection between the past and the present, in marine biodiversity research. |
2021 |
Baltoumas, Fotis A; Zafeiropoulou, Sofia; Karatzas, Evangelos; Paragkamian, Savvas; Thanati, Foteini; Iliopoulos, Ioannis; Eliopoulos, Aristides G; Schneider, Reinhard; Jensen, Lars Juhl; Pafilis, Evangelos; Pavlopoulos, Georgios A NAR Genomics and Bioinformatics, 3 (4), pp. lqab090, 2021, ISSN: 2631-9268. @article{baltoumas_onthefly20_2021, title = {OnTheFly2.0: a text-mining web application for automated biomedical entity recognition, document annotation, network and functional enrichment analysis}, author = {Fotis A Baltoumas and Sofia Zafeiropoulou and Evangelos Karatzas and Savvas Paragkamian and Foteini Thanati and Ioannis Iliopoulos and Aristides G Eliopoulos and Reinhard Schneider and Lars Juhl Jensen and Evangelos Pafilis and Georgios A Pavlopoulos}, url = {https://imbbc.hcmr.gr/wp-content/uploads/2021/12/2021-Baltoumas-NAR-74.pdf https://academic.oup.com/nargab/article/doi/10.1093/nargab/lqab090/6382333}, doi = {10.1093/nargab/lqab090}, issn = {2631-9268}, year = {2021}, date = {2021-10-01}, urldate = {2021-12-01}, journal = {NAR Genomics and Bioinformatics}, volume = {3}, number = {4}, pages = {lqab090}, abstract = {Abstract Extracting and processing information from documents is of great importance as lots of experimental results and findings are stored in local files. Therefore, extracting and analyzing biomedical terms from such files in an automated way is absolutely necessary. In this article, we present OnTheFly2.0, a web application for extracting biomedical entities from individual files such as plain texts, office documents, PDF files or images. OnTheFly2.0 can generate informative summaries in popup windows containing knowledge related to the identified terms along with links to various databases. It uses the EXTRACT tagging service to perform named entity recognition (NER) for genes/proteins, chemical compounds, organisms, tissues, environments, diseases, phenotypes and gene ontology terms. Multiple files can be analyzed, whereas identified terms such as proteins or genes can be explored through functional enrichment analysis or be associated with diseases and PubMed entries. Finally, protein–protein and protein–chemical networks can be generated with the use of STRING and STITCH services. To demonstrate its capacity for knowledge discovery, we interrogated published meta-analyses of clinical biomarkers of severe COVID-19 and uncovered inflammatory and senescence pathways that impact disease pathogenesis. OnTheFly2.0 currently supports 197 species and is available at http://bib.fleming.gr:3838/OnTheFly/ and http://onthefly.pavlopouloslab.info.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Abstract Extracting and processing information from documents is of great importance as lots of experimental results and findings are stored in local files. Therefore, extracting and analyzing biomedical terms from such files in an automated way is absolutely necessary. In this article, we present OnTheFly2.0, a web application for extracting biomedical entities from individual files such as plain texts, office documents, PDF files or images. OnTheFly2.0 can generate informative summaries in popup windows containing knowledge related to the identified terms along with links to various databases. It uses the EXTRACT tagging service to perform named entity recognition (NER) for genes/proteins, chemical compounds, organisms, tissues, environments, diseases, phenotypes and gene ontology terms. Multiple files can be analyzed, whereas identified terms such as proteins or genes can be explored through functional enrichment analysis or be associated with diseases and PubMed entries. Finally, protein–protein and protein–chemical networks can be generated with the use of STRING and STITCH services. To demonstrate its capacity for knowledge discovery, we interrogated published meta-analyses of clinical biomarkers of severe COVID-19 and uncovered inflammatory and senescence pathways that impact disease pathogenesis. OnTheFly2.0 currently supports 197 species and is available at http://bib.fleming.gr:3838/OnTheFly/ and http://onthefly.pavlopouloslab.info. |
Vangay, Pajau; Burgin, Josephine; Johnston, Anjanette; Beck, Kristen L; Berrios, Daniel C; Blumberg, Kai; Canon, Shane; Chain, Patrick; Chandonia, John-Marc; Christianson, Danielle; Costes, Sylvain V; Damerow, Joan; Duncan, William D; Dundore-Arias, Jose Pablo; Fagnan, Kjiersten; Galazka, Jonathan M; Gibbons, Sean M; Hays, David; Hervey, Judson; Hu, Bin; Hurwitz, Bonnie L; Jaiswal, Pankaj; Joachimiak, Marcin P; Kinkel, Linda; Ladau, Joshua; Martin, Stanton L; McCue, Lee Ann; Miller, Kayd; Mouncey, Nigel; Mungall, Chris; Pafilis, Evangelos; Reddy, T B K; Richardson, Lorna; Roux, Simon; Shaffer, Justin P; Sundaramurthi, Jagadish Chandrabose; Thompson, Luke R; Timme, Ruth E; Zheng, Jie; Wood-Charlson, Elisha M; Eloe-Fadrosh, Emiley A Microbiome Metadata Standards: Report of the National Microbiome Data Collaborative’s Workshop and Follow-On Activities Journal Article 6 , 2021. @article{2021Vangay,PafilismSystems, title = {Microbiome Metadata Standards: Report of the National Microbiome Data Collaborative’s Workshop and Follow-On Activities}, author = {Pajau Vangay and Josephine Burgin and Anjanette Johnston and Kristen L Beck and Daniel C Berrios and Kai Blumberg and Shane Canon and Patrick Chain and John-Marc Chandonia and Danielle Christianson and Sylvain V Costes and Joan Damerow and William D Duncan and Jose Pablo Dundore-Arias and Kjiersten Fagnan and Jonathan M Galazka and Sean M Gibbons and David Hays and Judson Hervey and Bin Hu and Bonnie L Hurwitz and Pankaj Jaiswal and Marcin P Joachimiak and Linda Kinkel and Joshua Ladau and Stanton L Martin and Lee Ann McCue and Kayd Miller and Nigel Mouncey and Chris Mungall and Evangelos Pafilis and T B K Reddy and Lorna Richardson and Simon Roux and Justin P Shaffer and Jagadish Chandrabose Sundaramurthi and Luke R Thompson and Ruth E Timme and Jie Zheng and Elisha M Wood-Charlson and Emiley A Eloe-Fadrosh}, url = {https://imbbc.hcmr.gr/wp-content/uploads/2021/03/2021-Vangay-Pafilis-mSystems-19-1.pdf}, doi = {https://doi.org/10.1128/mSystems.01194-20}, year = {2021}, date = {2021-02-23}, volume = {6}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Zafeiropoulos, Haris; Gioti, Anastasia; Ninidakis, Stelios; Potirakis, Antonis; Paragkamian, Savvas; Angelova, Nelina; Antoniou, Aglaia; Danis, Theodoros; Kaitetzidou, Eliza; Kasapidis, Panagiotis; Kristoffersen, Jon Bent; Papadogiannis, Vasileios; Pavloudi, Christina; Ha, Quoc Viet; Lagnel, Jacques; Pattakos, Nikos; Perantinos, Giorgos; Sidirokastritis, Dimitris; Vavilis, Panagiotis; Kotoulas, Georgios; Manousaki, Tereza; Sarropoulou, Elena; Tsigenopoulos, Costas S; Arvanitidis, Christos; Magoulas, Antonios; Pafilis, Evangelos 0s and 1s in marine molecular research: a regional HPC perspective Journal Article GigaScience, 10 (8), pp. giab053, 2021, ISSN: 2047-217X. @article{zafeiropoulos_0s_2021, title = {0s and 1s in marine molecular research: a regional HPC perspective}, author = {Haris Zafeiropoulos and Anastasia Gioti and Stelios Ninidakis and Antonis Potirakis and Savvas Paragkamian and Nelina Angelova and Aglaia Antoniou and Theodoros Danis and Eliza Kaitetzidou and Panagiotis Kasapidis and Jon Bent Kristoffersen and Vasileios Papadogiannis and Christina Pavloudi and Quoc Viet Ha and Jacques Lagnel and Nikos Pattakos and Giorgos Perantinos and Dimitris Sidirokastritis and Panagiotis Vavilis and Georgios Kotoulas and Tereza Manousaki and Elena Sarropoulou and Costas S Tsigenopoulos and Christos Arvanitidis and Antonios Magoulas and Evangelos Pafilis}, url = {https://imbbc.hcmr.gr/wp-content/uploads/2021/08/2021-Zafeiropoulos-GiGa-63.pdf https://academic.oup.com/gigascience/article/doi/10.1093/gigascience/giab053/6353916}, doi = {10.1093/gigascience/giab053}, issn = {2047-217X}, year = {2021}, date = {2021-01-01}, urldate = {2021-08-23}, journal = {GigaScience}, volume = {10}, number = {8}, pages = {giab053}, abstract = {Abstract High-performance computing (HPC) systems have become indispensable for modern marine research, providing support to an increasing number and diversity of users. Pairing with the impetus offered by high-throughput methods to key areas such as non-model organism studies, their operation continuously evolves to meet the corresponding computational challenges. Here, we present a Tier 2 (regional) HPC facility, operating for over a decade at the Institute of Marine Biology, Biotechnology, and Aquaculture of the Hellenic Centre for Marine Research in Greece. Strategic choices made in design and upgrades aimed to strike a balance between depth (the need for a few high-memory nodes) and breadth (a number of slimmer nodes), as dictated by the idiosyncrasy of the supported research. Qualitative computational requirement analysis of the latter revealed the diversity of marine fields, methods, and approaches adopted to translate data into knowledge. In addition, hardware and software architectures, usage statistics, policy, and user management aspects of the facility are presented. Drawing upon the last decade’s experience from the different levels of operation of the Institute of Marine Biology, Biotechnology, and Aquaculture HPC facility, a number of lessons are presented; these have contributed to the facility’s future directions in light of emerging distribution technologies (e.g., containers) and Research Infrastructure evolution. In combination with detailed knowledge of the facility usage and its upcoming upgrade, future collaborations in marine research and beyond are envisioned.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Abstract High-performance computing (HPC) systems have become indispensable for modern marine research, providing support to an increasing number and diversity of users. Pairing with the impetus offered by high-throughput methods to key areas such as non-model organism studies, their operation continuously evolves to meet the corresponding computational challenges. Here, we present a Tier 2 (regional) HPC facility, operating for over a decade at the Institute of Marine Biology, Biotechnology, and Aquaculture of the Hellenic Centre for Marine Research in Greece. Strategic choices made in design and upgrades aimed to strike a balance between depth (the need for a few high-memory nodes) and breadth (a number of slimmer nodes), as dictated by the idiosyncrasy of the supported research. Qualitative computational requirement analysis of the latter revealed the diversity of marine fields, methods, and approaches adopted to translate data into knowledge. In addition, hardware and software architectures, usage statistics, policy, and user management aspects of the facility are presented. Drawing upon the last decade’s experience from the different levels of operation of the Institute of Marine Biology, Biotechnology, and Aquaculture HPC facility, a number of lessons are presented; these have contributed to the facility’s future directions in light of emerging distribution technologies (e.g., containers) and Research Infrastructure evolution. In combination with detailed knowledge of the facility usage and its upcoming upgrade, future collaborations in marine research and beyond are envisioned. |
2020 |
Zafeiropoulos, Haris; Viet, Ha Quoc; Vasileiadou, Katerina; Potirakis, Antonis; Arvanitidis, Christos; Topalis, Pantelis; Pavloudi, Christina; Pafilis, Evangelos PEMA: a flexible Pipeline for Environmental DNA Metabarcoding Analysis of the 16S/18S ribosomal RNA, ITS, and COI marker genes Journal Article GigaScience, 9 (3), 2020, ISSN: 2047-217X, (_eprint: https://academic.oup.com/gigascience/article-pdf/9/3/giaa022/32894405/giaa022.pdf). @article{zafeiropoulos_pema_2020, title = {PEMA: a flexible Pipeline for Environmental DNA Metabarcoding Analysis of the 16S/18S ribosomal RNA, ITS, and COI marker genes}, author = {Haris Zafeiropoulos and Ha Quoc Viet and Katerina Vasileiadou and Antonis Potirakis and Christos Arvanitidis and Pantelis Topalis and Christina Pavloudi and Evangelos Pafilis}, url = {https://doi.org/10.1093/gigascience/giaa022}, doi = {10.1093/gigascience/giaa022}, issn = {2047-217X}, year = {2020}, date = {2020-01-01}, journal = {GigaScience}, volume = {9}, number = {3}, abstract = {Environmental DNA and metabarcoding allow the identification of a mixture of species and launch a new era in bio- and eco-assessment. Many steps are required to obtain taxonomically assigned matrices from raw data. For most of these, a plethora of tools are available; each tool's execution parameters need to be tailored to reflect each experiment's idiosyncrasy. Adding to this complexity, the computation capacity of high-performance computing systems is frequently required for such analyses. To address the difficulties, bioinformatic pipelines need to combine state-of-the art technologies and algorithms with an easy to get-set-use framework, allowing researchers to tune each study. Software containerization technologies ease the sharing and running of software packages across operating systems; thus, they strongly facilitate pipeline development and usage. Likewise programming languages specialized for big data pipelines incorporate features like roll-back checkpoints and on-demand partial pipeline execution.PEMA is a containerized assembly of key metabarcoding analysis tools that requires low effort in setting up, running, and customizing to researchers’ needs. Based on third-party tools, PEMA performs read pre-processing, (molecular) operational taxonomic unit clustering, amplicon sequence variant inference, and taxonomy assignment for 16S and 18S ribosomal RNA, as well as ITS and COI marker gene data. Owing to its simplified parameterization and checkpoint support, PEMA allows users to explore alternative algorithms for specific steps of the pipeline without the need of a complete re-execution. PEMA was evaluated against both mock communities and previously published datasets and achieved results of comparable quality.A high-performance computing–based approach was used to develop PEMA; however, it can be used in personal computers as well. PEMA's time-efficient performance and good results will allow it to be used for accurate environmental DNA metabarcoding analysis, thus enhancing the applicability of next-generation biodiversity assessment studies.}, note = {_eprint: https://academic.oup.com/gigascience/article-pdf/9/3/giaa022/32894405/giaa022.pdf}, keywords = {}, pubstate = {published}, tppubtype = {article} } Environmental DNA and metabarcoding allow the identification of a mixture of species and launch a new era in bio- and eco-assessment. Many steps are required to obtain taxonomically assigned matrices from raw data. For most of these, a plethora of tools are available; each tool's execution parameters need to be tailored to reflect each experiment's idiosyncrasy. Adding to this complexity, the computation capacity of high-performance computing systems is frequently required for such analyses. To address the difficulties, bioinformatic pipelines need to combine state-of-the art technologies and algorithms with an easy to get-set-use framework, allowing researchers to tune each study. Software containerization technologies ease the sharing and running of software packages across operating systems; thus, they strongly facilitate pipeline development and usage. Likewise programming languages specialized for big data pipelines incorporate features like roll-back checkpoints and on-demand partial pipeline execution.PEMA is a containerized assembly of key metabarcoding analysis tools that requires low effort in setting up, running, and customizing to researchers’ needs. Based on third-party tools, PEMA performs read pre-processing, (molecular) operational taxonomic unit clustering, amplicon sequence variant inference, and taxonomy assignment for 16S and 18S ribosomal RNA, as well as ITS and COI marker gene data. Owing to its simplified parameterization and checkpoint support, PEMA allows users to explore alternative algorithms for specific steps of the pipeline without the need of a complete re-execution. PEMA was evaluated against both mock communities and previously published datasets and achieved results of comparable quality.A high-performance computing–based approach was used to develop PEMA; however, it can be used in personal computers as well. PEMA's time-efficient performance and good results will allow it to be used for accurate environmental DNA metabarcoding analysis, thus enhancing the applicability of next-generation biodiversity assessment studies. |
2019 |
Chasapi, Anastasia; Aivaliotis, Michalis; Angelis, Lefteris; Chanalaris, Anastasios; Iliopoulos, Ioannis; Kappas, Ilias; Karapiperis, Christos; Kyrpides, Nikos C; Pafilis, Evangelos; Panteris, Eleftherios; Topalis, Pantelis; Tsiamis, George; Vizirianakis, Ioannis S; Vlassi, Metaxia; Promponas, Vasilis J; Ouzounis, Christos A Establishment of computational biology in Greece and Cyprus: Past, present, and future Journal Article PLOS Computational Biology, 15 (12), pp. e1007532, 2019, ISSN: 1553-7358, (BIODIV). @article{chasapi_establishment_2019, title = {Establishment of computational biology in Greece and Cyprus: Past, present, and future}, author = {Anastasia Chasapi and Michalis Aivaliotis and Lefteris Angelis and Anastasios Chanalaris and Ioannis Iliopoulos and Ilias Kappas and Christos Karapiperis and Nikos C Kyrpides and Evangelos Pafilis and Eleftherios Panteris and Pantelis Topalis and George Tsiamis and Ioannis S Vizirianakis and Metaxia Vlassi and Vasilis J Promponas and Christos A Ouzounis}, editor = {Jason A Papin}, url = {https://dx.plos.org/10.1371/journal.pcbi.1007532}, doi = {10.1371/journal.pcbi.1007532}, issn = {1553-7358}, year = {2019}, date = {2019-12-01}, urldate = {2022-01-17}, journal = {PLOS Computational Biology}, volume = {15}, number = {12}, pages = {e1007532}, note = {BIODIV}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
2016 |
Faulwetter, Sarah; Pafilis, Evangelos; Fanini, Lucia; Bailly, Nicolas; Agosti, Donat; Arvanitidis, Christos; Boicenco, Laura; Capatano, Terry; Claus, Simon; Dekeyzer, Stefanie; Georgiev, Teodor; Legaki, Aglaia; Mavraki, Dimitra; Oulas, Anastasis; Papastefanou, Gabriella; Penev, Lyubomir; Sautter, Guido; Schigel, Dmitry; Senderov, Viktor; Teaca, Adrian; Tsompanou, Marilena EMODnet Workshop on mechanisms and guidelines to mobilise historical data into biogeographic databases Journal Article Research Ideas and Outcomes, 2 , pp. e9774, 2016, ISSN: 2367-7163. @article{faulwetter_emodnet_2016, title = {EMODnet Workshop on mechanisms and guidelines to mobilise historical data into biogeographic databases}, author = {Sarah Faulwetter and Evangelos Pafilis and Lucia Fanini and Nicolas Bailly and Donat Agosti and Christos Arvanitidis and Laura Boicenco and Terry Capatano and Simon Claus and Stefanie Dekeyzer and Teodor Georgiev and Aglaia Legaki and Dimitra Mavraki and Anastasis Oulas and Gabriella Papastefanou and Lyubomir Penev and Guido Sautter and Dmitry Schigel and Viktor Senderov and Adrian Teaca and Marilena Tsompanou}, url = {http://rio.pensoft.net/articles.php?id=9774}, doi = {10.3897/rio.2.e9774}, issn = {2367-7163}, year = {2016}, date = {2016-07-01}, urldate = {2020-09-21}, journal = {Research Ideas and Outcomes}, volume = {2}, pages = {e9774}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Vasileiadou, K; Pavloudi, C; Kalantzi, I; Apostolaki, E T; Chatzigeorgiou, G; Chatzinikolaou, E; Pafilis, E; Papageorgiou, N; Fanini, L; Konstas, S; Fragopoulou, N; Arvanitidis, C Environmental variability and heavy metal concentrations from five lagoons in the Ionian Sea (Amvrakikos Gulf, W Greece) Journal Article Biodiversity Data Journal, 4 (1), 2016, ISSN: 13142828, (Publisher: Pensoft Publishers). @article{vasileiadou_environmental_2016, title = {Environmental variability and heavy metal concentrations from five lagoons in the Ionian Sea (Amvrakikos Gulf, W Greece)}, author = {K Vasileiadou and C Pavloudi and I Kalantzi and E T Apostolaki and G Chatzigeorgiou and E Chatzinikolaou and E Pafilis and N Papageorgiou and L Fanini and S Konstas and N Fragopoulou and C Arvanitidis}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85018621471&doi=10.3897%2fBDJ.4.e8233&partnerID=40&md5=91104fbd24f94063ee946df8c2486376}, doi = {10.3897/BDJ.4.e8233}, issn = {13142828}, year = {2016}, date = {2016-01-01}, journal = {Biodiversity Data Journal}, volume = {4}, number = {1}, abstract = {Background Coastal lagoons are ecosystems of major importance as they host a number of species tolerant to disturbances and they are highly productive. Therefore, these ecosystems should be protected to ensure stability and resilience. The lagoons of Amvrakikos Gulf form one of the most important lagoonal complexes in Greece. The optimal ecological status of these lagoons is crucial for the well-being of the biodiversity and the economic prosperity of the local communities. Thus, monitoring of the area is necessary to detect possible sources of disturbance and restore stability. New information The environmental variables and heavy metals concentrations, from five lagoons of Amvrakikos Gulf were measured from seasonal samplings and compared to the findings of previous studies in the area, in order to check for possible sources of disturbance. The analysis, showed that i) the values of the abiotic parameters vary with time (season), space (lagoon) and with space over time; ii) the variability of the environmental factors and enrichment in certain elements is naturally induced and no source of contamination is detected in the lagoons. © Vasileiadou K et al.}, note = {Publisher: Pensoft Publishers}, keywords = {}, pubstate = {published}, tppubtype = {article} } Background Coastal lagoons are ecosystems of major importance as they host a number of species tolerant to disturbances and they are highly productive. Therefore, these ecosystems should be protected to ensure stability and resilience. The lagoons of Amvrakikos Gulf form one of the most important lagoonal complexes in Greece. The optimal ecological status of these lagoons is crucial for the well-being of the biodiversity and the economic prosperity of the local communities. Thus, monitoring of the area is necessary to detect possible sources of disturbance and restore stability. New information The environmental variables and heavy metals concentrations, from five lagoons of Amvrakikos Gulf were measured from seasonal samplings and compared to the findings of previous studies in the area, in order to check for possible sources of disturbance. The analysis, showed that i) the values of the abiotic parameters vary with time (season), space (lagoon) and with space over time; ii) the variability of the environmental factors and enrichment in certain elements is naturally induced and no source of contamination is detected in the lagoons. © Vasileiadou K et al. |
Pafilis, E; Buttigieg, P L; Ferrell, B; Pereira, E; Schnetzer, J; Arvanitidis, C; Jensen, L J EXTRACT: Interactive extraction of environment metadata and term suggestion for metagenomic sample annotation Journal Article Database, 2016 , 2016, ISSN: 17580463, (Publisher: Oxford University Press). @article{pafilis_extract_2016, title = {EXTRACT: Interactive extraction of environment metadata and term suggestion for metagenomic sample annotation}, author = {E Pafilis and P L Buttigieg and B Ferrell and E Pereira and J Schnetzer and C Arvanitidis and L J Jensen}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85052188662&doi=10.1093%2fdatabase%2fbaw005&partnerID=40&md5=9d2901b712db55c2768dcd9c3d5cfbc3}, doi = {10.1093/database/baw005}, issn = {17580463}, year = {2016}, date = {2016-01-01}, journal = {Database}, volume = {2016}, abstract = {The microbial and molecular ecology research communities have made substantial progress on developing standards for annotating samples with environment metadata. However, sample manual annotation is a highly labor intensive process and requires familiarity with the terminologies used. We have therefore developed an interactive annotation tool, EXTRACT, which helps curators identify and extract standard-compliant terms for annotation of metagenomic records and other samples. Behind its web-based user interface, the system combines published methods for named entity recognition of environment, organism, tissue and disease terms. The evaluators in the BioCreative V Interactive Annotation Task found the system to be intuitive, useful, well documented and sufficiently accurate to be helpful in spotting relevant text passages and extracting organism and environment terms. Comparison of fully manual and text-mining-assisted curation revealed that EXTRACT speeds up annotation by 15-25% and helps curators to detect terms that would otherwise have been missed. © The Author(s) 2016.}, note = {Publisher: Oxford University Press}, keywords = {}, pubstate = {published}, tppubtype = {article} } The microbial and molecular ecology research communities have made substantial progress on developing standards for annotating samples with environment metadata. However, sample manual annotation is a highly labor intensive process and requires familiarity with the terminologies used. We have therefore developed an interactive annotation tool, EXTRACT, which helps curators identify and extract standard-compliant terms for annotation of metagenomic records and other samples. Behind its web-based user interface, the system combines published methods for named entity recognition of environment, organism, tissue and disease terms. The evaluators in the BioCreative V Interactive Annotation Task found the system to be intuitive, useful, well documented and sufficiently accurate to be helpful in spotting relevant text passages and extracting organism and environment terms. Comparison of fully manual and text-mining-assisted curation revealed that EXTRACT speeds up annotation by 15-25% and helps curators to detect terms that would otherwise have been missed. © The Author(s) 2016. |
Varsos, C; Patkos, T; Oulas, A; Pavloudi, C; Gougousis, A; Ijaz, U Z; Filiopoulou, I; Pattakos, N; Berghe, E V; Fernández-Guerra, A; Faulwetter, S; Chatzinikolaou, E; Pafilis, E; Bekiari, C; Doerr, M; Arvanitidis, C Optimized R functions for analysis of ecological community data using the R virtual laboratory (RvLab) Journal Article Biodiversity Data Journal, 4 (1), 2016, ISSN: 13142828, (Publisher: Pensoft Publishers). @article{varsos_optimized_2016, title = {Optimized R functions for analysis of ecological community data using the R virtual laboratory (RvLab)}, author = {C Varsos and T Patkos and A Oulas and C Pavloudi and A Gougousis and U Z Ijaz and I Filiopoulou and N Pattakos and E V Berghe and A Fernández-Guerra and S Faulwetter and E Chatzinikolaou and E Pafilis and C Bekiari and M Doerr and C Arvanitidis}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85018640003&doi=10.3897%2fBDJ.4.e8357&partnerID=40&md5=6003e9caf6582fdde7f4bffec91e154f}, doi = {10.3897/BDJ.4.e8357}, issn = {13142828}, year = {2016}, date = {2016-01-01}, journal = {Biodiversity Data Journal}, volume = {4}, number = {1}, abstract = {Background Parallel data manipulation using R has previously been addressed by members of the R community, however most of these studies produce ad hoc solutions that are not readily available to the average R user. Our targeted users, ranging from the expert ecologist/ microbiologists to computational biologists, often experience difficulties in finding optimal ways to exploit the full capacity of their computational resources. In addition, improving performance of commonly used R scripts becomes increasingly difficult especially with large datasets. Furthermore, the implementations described here can be of significant interest to expert bioinformaticians or R developers. Therefore, our goals can be summarized as: (i) description of a complete methodology for the analysis of large datasets by combining capabilities of diverse R packages, (ii) presentation of their application through a virtual R laboratory (RvLab) that makes execution of complex functions and visualization of results easy and readily available to the end-user. New information In this paper, the novelty stems from implementations of parallel methodologies which rely on the processing of data on different levels of abstraction and the availability of these processes through an integrated portal. Parallel implementation R packages, such as the pbdMPI (Programming with Big Data - Interface to MPI) package, are used to implement Single Program Multiple Data (SPMD) parallelization on primitive mathematical operations, allowing for interplay with functions of the vegan package. The dplyr and RPostgreSQL R packages are further integrated offering connections to dataframe like objects (databases) as secondary storage solutions whenever memory demands exceed available RAM resources. The RvLab is running on a PC cluster, using version 3.1.2 (2014-10-31) on a x86_64-pclinux- gnu (64-bit) platform, and offers an intuitive virtual environmet interface enabling users to perform analysis of ecological and microbial communities based on optimized vegan functions. A beta version of the RvLab is available after registration at: https:// portal.lifewatchgreece.eu/.}, note = {Publisher: Pensoft Publishers}, keywords = {}, pubstate = {published}, tppubtype = {article} } Background Parallel data manipulation using R has previously been addressed by members of the R community, however most of these studies produce ad hoc solutions that are not readily available to the average R user. Our targeted users, ranging from the expert ecologist/ microbiologists to computational biologists, often experience difficulties in finding optimal ways to exploit the full capacity of their computational resources. In addition, improving performance of commonly used R scripts becomes increasingly difficult especially with large datasets. Furthermore, the implementations described here can be of significant interest to expert bioinformaticians or R developers. Therefore, our goals can be summarized as: (i) description of a complete methodology for the analysis of large datasets by combining capabilities of diverse R packages, (ii) presentation of their application through a virtual R laboratory (RvLab) that makes execution of complex functions and visualization of results easy and readily available to the end-user. New information In this paper, the novelty stems from implementations of parallel methodologies which rely on the processing of data on different levels of abstraction and the availability of these processes through an integrated portal. Parallel implementation R packages, such as the pbdMPI (Programming with Big Data - Interface to MPI) package, are used to implement Single Program Multiple Data (SPMD) parallelization on primitive mathematical operations, allowing for interplay with functions of the vegan package. The dplyr and RPostgreSQL R packages are further integrated offering connections to dataframe like objects (databases) as secondary storage solutions whenever memory demands exceed available RAM resources. The RvLab is running on a PC cluster, using version 3.1.2 (2014-10-31) on a x86_64-pclinux- gnu (64-bit) platform, and offers an intuitive virtual environmet interface enabling users to perform analysis of ecological and microbial communities based on optimized vegan functions. A beta version of the RvLab is available after registration at: https:// portal.lifewatchgreece.eu/. |
Wang, Q; Abdul, S S; Almeida, L; Ananiadou, S; Balderas-Martínez, Y I; Batista-Navarro, R; Campos, D; Chilton, L; Chou, H -J; Contreras, G; Cooper, L; Dai, H -J; Ferrell, B; Fluck, J; Gama-Castro, S; George, N; Gkoutos, G; Irin, A K; Jensen, L J; Jimenez, S; Jue, T R; Keseler, I; Madan, S; Matos, S; McQuilton, P; Milacic, M; Mort, M; Natarajan, J; Pafilis, E; Pereira, E; Rao, S; Rinaldi, F; Rothfels, K; Salgado, D; Silva, R M; Singh, O; Stefancsik, R; Su, C -H; Subramani, S; Tadepally, H D; Tsaprouni, L; Vasilevsky, N; Wang, X; Chatr-Aryamontri, A; Laulederkind, S J F; Matis-Mitchell, S; McEntyre, J; Orchard, S; Pundir, S; Rodriguez-Esteban, R; Auken, Van K; Lu, Z; Schaeffer, M; Wu, C H; Hirschman, L; Arighi, C N Overview of the interactive task in BioCreative V Journal Article Database, 2016 , 2016, ISSN: 17580463, (Publisher: Oxford University Press). @article{wang_overview_2016, title = {Overview of the interactive task in BioCreative V}, author = {Q Wang and S S Abdul and L Almeida and S Ananiadou and Y I Balderas-Martínez and R Batista-Navarro and D Campos and L Chilton and H -J Chou and G Contreras and L Cooper and H -J Dai and B Ferrell and J Fluck and S Gama-Castro and N George and G Gkoutos and A K Irin and L J Jensen and S Jimenez and T R Jue and I Keseler and S Madan and S Matos and P McQuilton and M Milacic and M Mort and J Natarajan and E Pafilis and E Pereira and S Rao and F Rinaldi and K Rothfels and D Salgado and R M Silva and O Singh and R Stefancsik and C -H Su and S Subramani and H D Tadepally and L Tsaprouni and N Vasilevsky and X Wang and A Chatr-Aryamontri and S J F Laulederkind and S Matis-Mitchell and J McEntyre and S Orchard and S Pundir and R Rodriguez-Esteban and K Van Auken and Z Lu and M Schaeffer and C H Wu and L Hirschman and C N Arighi}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85009926974&doi=10.1093%2fdatabase%2fbaw119&partnerID=40&md5=402da6f94c190eed16badbee3f086daf}, doi = {10.1093/database/baw119}, issn = {17580463}, year = {2016}, date = {2016-01-01}, journal = {Database}, volume = {2016}, abstract = {Fully automated text mining (TM) systems promote efficient literature searching, retrieval, and review but are not sufficient to produce ready-to-consume curated documents. These systems are not meant to replace biocurators, but instead to assist them in one or more literature curation steps. To do so, the user interface is an important aspect that needs to be considered for tool adoption. The BioCreative Interactive task (IAT) is a track designed for exploring user-system interactions, promoting development of useful TM tools, and providing a communication channel between the biocuration and the TM communities. In BioCreative V, the IAT track followed a format similar to previous interactive tracks, where the utility and usability of TM tools, as well as the generation of use cases, have been the focal points. The proposed curation tasks are user-centric and formally evaluated by biocurators. In BioCreative V IAT, seven TM systems and 43 biocurators participated. Two levels of user participation were offered to broaden curator involvement and obtain more feedback on usability aspects. The full level participation involved training on the system, curation of a set of documents with and without TM assistance, tracking of time-on-task, and completion of a user survey. The partial level participation was designed to focus on usability aspects of the interface and not the performance per se. In this case, biocurators navigated the system by performing predesigned tasks and then were asked whether they were able to achieve the task and the level of difficulty in completing the task. In this manuscript, we describe the development of the interactive task, from planning to execution and discuss major findings for the systems tested.}, note = {Publisher: Oxford University Press}, keywords = {}, pubstate = {published}, tppubtype = {article} } Fully automated text mining (TM) systems promote efficient literature searching, retrieval, and review but are not sufficient to produce ready-to-consume curated documents. These systems are not meant to replace biocurators, but instead to assist them in one or more literature curation steps. To do so, the user interface is an important aspect that needs to be considered for tool adoption. The BioCreative Interactive task (IAT) is a track designed for exploring user-system interactions, promoting development of useful TM tools, and providing a communication channel between the biocuration and the TM communities. In BioCreative V, the IAT track followed a format similar to previous interactive tracks, where the utility and usability of TM tools, as well as the generation of use cases, have been the focal points. The proposed curation tasks are user-centric and formally evaluated by biocurators. In BioCreative V IAT, seven TM systems and 43 biocurators participated. Two levels of user participation were offered to broaden curator involvement and obtain more feedback on usability aspects. The full level participation involved training on the system, curation of a set of documents with and without TM assistance, tracking of time-on-task, and completion of a user survey. The partial level participation was designed to focus on usability aspects of the interface and not the performance per se. In this case, biocurators navigated the system by performing predesigned tasks and then were asked whether they were able to achieve the task and the level of difficulty in completing the task. In this manuscript, we describe the development of the interactive task, from planning to execution and discuss major findings for the systems tested. |
Sinclair, L; Ijaz, U Z; Jensen, L J; Coolen, M J L; Gubry-Rangin, C; Chroňáková, A; Oulas, A; Pavloudi, C; Schnetzer, J; Weimann, A; Ijaz, A; Eiler, A; Quince, C; Pafilis, E Seqenv: Linking sequences to environments through text mining Journal Article PeerJ, 2016 (12), 2016, ISSN: 21678359, (Publisher: PeerJ Inc.). @article{sinclair_seqenv_2016, title = {Seqenv: Linking sequences to environments through text mining}, author = {L Sinclair and U Z Ijaz and L J Jensen and M J L Coolen and C Gubry-Rangin and A Chroňáková and A Oulas and C Pavloudi and J Schnetzer and A Weimann and A Ijaz and A Eiler and C Quince and E Pafilis}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85007364069&doi=10.7717%2fpeerj.2690&partnerID=40&md5=d6fb9302c27b05c3c656dcf2f8aa9512}, doi = {10.7717/peerj.2690}, issn = {21678359}, year = {2016}, date = {2016-01-01}, journal = {PeerJ}, volume = {2016}, number = {12}, abstract = {Understanding the distribution of taxa and associated traits across different environments is one of the central questions in microbial ecology. High-throughput sequencing (HTS) studies are presently generating huge volumes of data to address this biogeographical topic. However, these studies are often focused on specific environment types or processes leading to the production of individual, unconnected datasets. The large amounts of legacy sequence data with associated metadata that exist can be harnessed to better place the genetic information found in these surveys into a wider environmental context. Here we introduce a software program, seqenv, to carry out precisely such a task. It automatically performs similarity searches of short sequences against the “nt” nucleotide database provided by NCBI and, out of every hit, extracts-if it is available-the textual metadata field. After collecting all the isolation sources from all the search results, we run a text mining algorithm to identify and parse words that are associated with the Environmental Ontology (EnvO) controlled vocabulary. This, in turn, enables us to determine both in which environments individual sequences or taxa have previously been observed and, by weighted summation of those results, to summarize complete samples. We present two demonstrative applications of seqenv to a survey of ammonia oxidizing archaea as well as to a plankton paleome dataset from the Black Sea. These demonstrate the ability of the tool to reveal novel patterns in HTS and its utility in the fields of environmental source tracking, paleontology, and studies of microbial biogeography. To install seqenv, go to: https://github.com/xapple/seqenv. © 2016 Sinclair et al.}, note = {Publisher: PeerJ Inc.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Understanding the distribution of taxa and associated traits across different environments is one of the central questions in microbial ecology. High-throughput sequencing (HTS) studies are presently generating huge volumes of data to address this biogeographical topic. However, these studies are often focused on specific environment types or processes leading to the production of individual, unconnected datasets. The large amounts of legacy sequence data with associated metadata that exist can be harnessed to better place the genetic information found in these surveys into a wider environmental context. Here we introduce a software program, seqenv, to carry out precisely such a task. It automatically performs similarity searches of short sequences against the “nt” nucleotide database provided by NCBI and, out of every hit, extracts-if it is available-the textual metadata field. After collecting all the isolation sources from all the search results, we run a text mining algorithm to identify and parse words that are associated with the Environmental Ontology (EnvO) controlled vocabulary. This, in turn, enables us to determine both in which environments individual sequences or taxa have previously been observed and, by weighted summation of those results, to summarize complete samples. We present two demonstrative applications of seqenv to a survey of ammonia oxidizing archaea as well as to a plankton paleome dataset from the Black Sea. These demonstrate the ability of the tool to reveal novel patterns in HTS and its utility in the fields of environmental source tracking, paleontology, and studies of microbial biogeography. To install seqenv, go to: https://github.com/xapple/seqenv. © 2016 Sinclair et al. |
Hoopen, P T; Amid, C; Buttigieg, P L; Pafilis, E; Bravakos, P; O-Tárraga, A M C; Gibson, R; Kahlke, T; Legaki, A; Murthy, K N; Papastefanou, G; Pereira, E; Rossello, M; Toribio, A L; Cochrane, G Value, but high costs in post-deposition data Curation Journal Article Database, 2016 , 2016, ISSN: 17580463, (Publisher: Oxford University Press). @article{hoopen_value_2016, title = {Value, but high costs in post-deposition data Curation}, author = {P T Hoopen and C Amid and P L Buttigieg and E Pafilis and P Bravakos and A M C O-Tárraga and R Gibson and T Kahlke and A Legaki and K N Murthy and G Papastefanou and E Pereira and M Rossello and A L Toribio and G Cochrane}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84969922176&doi=10.1093%2fdatabase%2fbav126&partnerID=40&md5=b3a55db8e3812454067026d468aa02dc}, doi = {10.1093/database/bav126}, issn = {17580463}, year = {2016}, date = {2016-01-01}, journal = {Database}, volume = {2016}, abstract = {Discoverability of sequence data in primary data archives is proportional to the richness of contextual information associated with the data. Here, we describe an exercise in the improvement of contextual information surrounding sample records associated with metagenomics sequence reads available in the European Nucleotide Archive. We outline the annotation process and summarize findings of this effort aimed at increasing usability of publicly available environmental data. Furthermore, we emphasize the benefits of such an exercise and detail its costs. We conclude that such a third party annotation approach is expensive and has value as an element of curation, but should form only part of a more sustainable submitter-driven approach. © The Author(s) 2016. Published by Oxford University Press.}, note = {Publisher: Oxford University Press}, keywords = {}, pubstate = {published}, tppubtype = {article} } Discoverability of sequence data in primary data archives is proportional to the richness of contextual information associated with the data. Here, we describe an exercise in the improvement of contextual information surrounding sample records associated with metagenomics sequence reads available in the European Nucleotide Archive. We outline the annotation process and summarize findings of this effort aimed at increasing usability of publicly available environmental data. Furthermore, we emphasize the benefits of such an exercise and detail its costs. We conclude that such a third party annotation approach is expensive and has value as an element of curation, but should form only part of a more sustainable submitter-driven approach. © The Author(s) 2016. Published by Oxford University Press. |
Buttigieg, P L; Pafilis, E; Lewis, S E; Schildhauer, M P; Walls, R L; Mungall, C J The environment ontology in 2016: Bridging domains with increased scope, semantic density, and interoperation Journal Article Journal of Biomedical Semantics, 7 (1), 2016, ISSN: 20411480, (Publisher: BioMed Central Ltd.). @article{buttigieg_environment_2016, title = {The environment ontology in 2016: Bridging domains with increased scope, semantic density, and interoperation}, author = {P L Buttigieg and E Pafilis and S E Lewis and M P Schildhauer and R L Walls and C J Mungall}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84988625177&doi=10.1186%2fs13326-016-0097-6&partnerID=40&md5=c65c1deb2752ddedfe277e0968752111}, doi = {10.1186/s13326-016-0097-6}, issn = {20411480}, year = {2016}, date = {2016-01-01}, journal = {Journal of Biomedical Semantics}, volume = {7}, number = {1}, abstract = {Background: The Environment Ontology (ENVO; http://www.environmentontology.org/), first described in 2013, is a resource and research target for the semantically controlled description of environmental entities. The ontology's initial aim was the representation of the biomes, environmental features, and environmental materials pertinent to genomic and microbiome-related investigations. However, the need for environmental semantics is common to a multitude of fields, and ENVO's use has steadily grown since its initial description. We have thus expanded, enhanced, and generalised the ontology to support its increasingly diverse applications. Methods: We have updated our development suite to promote expressivity, consistency, and speed: we now develop ENVO in the Web Ontology Language (OWL) and employ templating methods to accelerate class creation. We have also taken steps to better align ENVO with the Open Biological and Biomedical Ontologies (OBO) Foundry principles and interoperate with existing OBO ontologies. Further, we applied text-mining approaches to extract habitat information from the Encyclopedia of Life and automatically create experimental habitat classes within ENVO. Results: Relative to its state in 2013, ENVO's content, scope, and implementation have been enhanced and much of its existing content revised for improved semantic representation. ENVO now offers representations of habitats, environmental processes, anthropogenic environments, and entities relevant to environmental health initiatives and the global Sustainable Development Agenda for 2030. Several branches of ENVO have been used to incubate and seed new ontologies in previously unrepresented domains such as food and agronomy. The current release version of the ontology, in OWL format, is available at http://purl.obolibrary.org/obo/envo.owl. Conclusions: ENVO has been shaped into an ontology which bridges multiple domains including biomedicine, natural and anthropogenic ecology, 'omics, and socioeconomic development. Through continued interactions with our users and partners, particularly those performing data archiving and sythesis, we anticipate that ENVO's growth will accelerate in 2017. As always, we invite further contributions and collaboration to advance the semantic representation of the environment, ranging from geographic features and environmental materials, across habitats and ecosystems, to everyday objects in household settings. © 2016 The Author(s).}, note = {Publisher: BioMed Central Ltd.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Background: The Environment Ontology (ENVO; http://www.environmentontology.org/), first described in 2013, is a resource and research target for the semantically controlled description of environmental entities. The ontology's initial aim was the representation of the biomes, environmental features, and environmental materials pertinent to genomic and microbiome-related investigations. However, the need for environmental semantics is common to a multitude of fields, and ENVO's use has steadily grown since its initial description. We have thus expanded, enhanced, and generalised the ontology to support its increasingly diverse applications. Methods: We have updated our development suite to promote expressivity, consistency, and speed: we now develop ENVO in the Web Ontology Language (OWL) and employ templating methods to accelerate class creation. We have also taken steps to better align ENVO with the Open Biological and Biomedical Ontologies (OBO) Foundry principles and interoperate with existing OBO ontologies. Further, we applied text-mining approaches to extract habitat information from the Encyclopedia of Life and automatically create experimental habitat classes within ENVO. Results: Relative to its state in 2013, ENVO's content, scope, and implementation have been enhanced and much of its existing content revised for improved semantic representation. ENVO now offers representations of habitats, environmental processes, anthropogenic environments, and entities relevant to environmental health initiatives and the global Sustainable Development Agenda for 2030. Several branches of ENVO have been used to incubate and seed new ontologies in previously unrepresented domains such as food and agronomy. The current release version of the ontology, in OWL format, is available at http://purl.obolibrary.org/obo/envo.owl. Conclusions: ENVO has been shaped into an ontology which bridges multiple domains including biomedicine, natural and anthropogenic ecology, 'omics, and socioeconomic development. Through continued interactions with our users and partners, particularly those performing data archiving and sythesis, we anticipate that ENVO's growth will accelerate in 2017. As always, we invite further contributions and collaboration to advance the semantic representation of the environment, ranging from geographic features and environmental materials, across habitats and ecosystems, to everyday objects in household settings. © 2016 The Author(s). |
2015 |
Pafilis, E; Frankild, S P; Schnetzer, J; Fanini, L; Faulwetter, S; Pavloudi, C; Vasileiadou, K; Leary, P; Hammock, J; Schulz, K; Parr, C S; Arvanitidis, C; Jensen, L J ENVIRONMENTS and EOL: Identification of Environment Ontology terms in text and the annotation of the Encyclopedia of Life Journal Article Bioinformatics, 31 (11), pp. 1872–1874, 2015, ISSN: 13674803, (Publisher: Oxford University Press). @article{pafilis_environments_2015, title = {ENVIRONMENTS and EOL: Identification of Environment Ontology terms in text and the annotation of the Encyclopedia of Life}, author = {E Pafilis and S P Frankild and J Schnetzer and L Fanini and S Faulwetter and C Pavloudi and K Vasileiadou and P Leary and J Hammock and K Schulz and C S Parr and C Arvanitidis and L J Jensen}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84941662911&doi=10.1093%2fbioinformatics%2fbtv045&partnerID=40&md5=521c1035d81d477752f37d09c638a7c0}, doi = {10.1093/bioinformatics/btv045}, issn = {13674803}, year = {2015}, date = {2015-01-01}, journal = {Bioinformatics}, volume = {31}, number = {11}, pages = {1872--1874}, abstract = {Summary: The association of organisms to their environments is a key issue in exploring biodiversity patterns. This knowledge has traditionally been scattered, but textual descriptions of taxa and their habitats are now being consolidated in centralized resources. However, structured annotations are needed to facilitate large-scale analyses. Therefore, we developed ENVIRONMENTS, a fast dictionary- based tagger capable of identifying Environment Ontology (ENVO) terms in text. We evaluate the accuracy of the tagger on a new manually curated corpus of 600 Encyclopedia of Life (EOL) species pages. We use the tagger to associate taxa with environments by tagging EOL text content monthly, and integrate the results into the EOL to disseminate them to a broad audience of users. Availability and implementation: The software and the corpus are available under the open-source BSD and the CC-BY-NC-SA 3.0 licenses, respectively, at http://environments.hcmr.gr. © The Author 2015. Published by Oxford University Press. All rights reserved.}, note = {Publisher: Oxford University Press}, keywords = {}, pubstate = {published}, tppubtype = {article} } Summary: The association of organisms to their environments is a key issue in exploring biodiversity patterns. This knowledge has traditionally been scattered, but textual descriptions of taxa and their habitats are now being consolidated in centralized resources. However, structured annotations are needed to facilitate large-scale analyses. Therefore, we developed ENVIRONMENTS, a fast dictionary- based tagger capable of identifying Environment Ontology (ENVO) terms in text. We evaluate the accuracy of the tagger on a new manually curated corpus of 600 Encyclopedia of Life (EOL) species pages. We use the tagger to associate taxa with environments by tagging EOL text content monthly, and integrate the results into the EOL to disseminate them to a broad audience of users. Availability and implementation: The software and the corpus are available under the open-source BSD and the CC-BY-NC-SA 3.0 licenses, respectively, at http://environments.hcmr.gr. © The Author 2015. Published by Oxford University Press. All rights reserved. |
2014 |
Davies, N; Field, D; Amaral-Zettler, L; Clark, M S; Deck, J; Drummond, A; Faith, D P; Geller, J; Gilbert, J; Glöckner, F O; Hirsch, P R; Leong, J -A; Meyer, C; Obst, M; Planes, S; Scholin, C; Vogler, A P; Gates, R D; Toonen, R; Berteaux-Lecellier, V; Barbier, M; Barker, K; Bertilsson, S; Bicak, M; Bietz, M J; Bobe, J; Bodrossy, L; Borja, A; Coddington, J; Fuhrman, J; Gerdts, G; Gillespie, R; Goodwin, K; Hanson, P C; Hero, J -M; Hoekman, D; Jansson, J; Jeanthon, C; Kao, R; Klindworth, A; Knight, R; Kottmann, R; Koo, M S; Kotoulas, G; Lowe, A J; Marteinsson, V T; Meyer, F; Morrison, N; Myrold, D D; Pafilis, E; Parker, S; Parnell, J J; Polymenakou, P N; Ratnasingham, S; Roderick, G K; Rodriguez-Ezpeleta, N; Schonrogge, K; Simon, N; Valette-Silver, N J; Springer, Y P; Stone, G N; Stones-Havas, S; Sansone, S -A; Thibault, K M; Wecker, P; Wichels, A; Wooley, J C; Yahara, T; Zingone, A The founding charter of the Genomic Observatories Network Journal Article GigaScience, 3 (1), 2014, ISSN: 2047217X, (Publisher: BioMed Central Ltd.). @article{davies_founding_2014, title = {The founding charter of the Genomic Observatories Network}, author = {N Davies and D Field and L Amaral-Zettler and M S Clark and J Deck and A Drummond and D P Faith and J Geller and J Gilbert and F O Glöckner and P R Hirsch and J -A Leong and C Meyer and M Obst and S Planes and C Scholin and A P Vogler and R D Gates and R Toonen and V Berteaux-Lecellier and M Barbier and K Barker and S Bertilsson and M Bicak and M J Bietz and J Bobe and L Bodrossy and A Borja and J Coddington and J Fuhrman and G Gerdts and R Gillespie and K Goodwin and P C Hanson and J -M Hero and D Hoekman and J Jansson and C Jeanthon and R Kao and A Klindworth and R Knight and R Kottmann and M S Koo and G Kotoulas and A J Lowe and V T Marteinsson and F Meyer and N Morrison and D D Myrold and E Pafilis and S Parker and J J Parnell and P N Polymenakou and S Ratnasingham and G K Roderick and N Rodriguez-Ezpeleta and K Schonrogge and N Simon and N J Valette-Silver and Y P Springer and G N Stone and S Stones-Havas and S -A Sansone and K M Thibault and P Wecker and A Wichels and J C Wooley and T Yahara and A Zingone}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84920397295&doi=10.1186%2f2047-217X-3-2&partnerID=40&md5=a043d700cdaaf754ee7c91c668fc964c}, doi = {10.1186/2047-217X-3-2}, issn = {2047217X}, year = {2014}, date = {2014-01-01}, journal = {GigaScience}, volume = {3}, number = {1}, abstract = {The co-authors of this paper hereby state their intention to work together to launch the Genomic Observatories Network (GOs Network) for which this document will serve as its Founding Charter. We define a Genomic Observatory as an ecosystem and/or site subject to long-term scientific research, including (but not limited to) the sustained study of genomic biodiversity from single-celled microbes to multicellular organisms.An international group of 64 scientists first published the call for a global network of Genomic Observatories in January 2012. The vision for such a network was expanded in a subsequent paper and developed over a series of meetings in Bremen (Germany), Shenzhen (China), Moorea (French Polynesia), Oxford (UK), Pacific Grove (California, USA), Washington (DC, USA), and London (UK). While this community-building process continues, here we express our mutual intent to establish the GOs Network formally, and to describe our shared vision for its future. The views expressed here are ours alone as individual scientists, and do not necessarily represent those of the institutions with which we are affiliated. © 2014 Davies et al.; licensee BioMed Central Ltd.}, note = {Publisher: BioMed Central Ltd.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The co-authors of this paper hereby state their intention to work together to launch the Genomic Observatories Network (GOs Network) for which this document will serve as its Founding Charter. We define a Genomic Observatory as an ecosystem and/or site subject to long-term scientific research, including (but not limited to) the sustained study of genomic biodiversity from single-celled microbes to multicellular organisms.An international group of 64 scientists first published the call for a global network of Genomic Observatories in January 2012. The vision for such a network was expanded in a subsequent paper and developed over a series of meetings in Bremen (Germany), Shenzhen (China), Moorea (French Polynesia), Oxford (UK), Pacific Grove (California, USA), Washington (DC, USA), and London (UK). While this community-building process continues, here we express our mutual intent to establish the GOs Network formally, and to describe our shared vision for its future. The views expressed here are ours alone as individual scientists, and do not necessarily represent those of the institutions with which we are affiliated. © 2014 Davies et al.; licensee BioMed Central Ltd. |
Pafilis, Evangelos; Brown, Nigel P; Horn, Heiko Automated Annotation of Scientific Documents: Increasing Access to Biological Knowledge Incollection Elloumi, Mourad (Ed.): Biological Knowledge Discovery Handbook: Preprocessing, Mining and Postprocessing of Biological Data, pp. 869–900, Wiley, 2014, (Backup Publisher: Wiley Section: Automated Annotation of Scientific Documents: Increasing Access to Biological Knowledge). @incollection{pafilis_automated_2014, title = {Automated Annotation of Scientific Documents: Increasing Access to Biological Knowledge}, author = {Evangelos Pafilis and Nigel P Brown and Heiko Horn}, editor = {Mourad Elloumi}, year = {2014}, date = {2014-01-01}, booktitle = {Biological Knowledge Discovery Handbook: Preprocessing, Mining and Postprocessing of Biological Data}, pages = {869--900}, publisher = {Wiley}, note = {Backup Publisher: Wiley Section: Automated Annotation of Scientific Documents: Increasing Access to Biological Knowledge}, keywords = {}, pubstate = {published}, tppubtype = {incollection} } |
Papanikolaou, N; Pavlopoulos, G A; Pafilis, E; Theodosiou, T; Schneider, R; Satagopam, V P; Ouzounis, C A; Eliopoulos, A G; Promponas, V J; Iliopoulos, I BioTextQuest+: A knowledge integration platform for literature mining and concept discovery Journal Article Bioinformatics, 30 (22), pp. 3249–3256, 2014, ISSN: 13674803, (Publisher: Oxford University Press). @article{papanikolaou_biotextquest_2014, title = {BioTextQuest+: A knowledge integration platform for literature mining and concept discovery}, author = {N Papanikolaou and G A Pavlopoulos and E Pafilis and T Theodosiou and R Schneider and V P Satagopam and C A Ouzounis and A G Eliopoulos and V J Promponas and I Iliopoulos}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84911396363&doi=10.1093%2fbioinformatics%2fbtu524&partnerID=40&md5=4bcb398a45d0643b0cd69922ee0f4917}, doi = {10.1093/bioinformatics/btu524}, issn = {13674803}, year = {2014}, date = {2014-01-01}, journal = {Bioinformatics}, volume = {30}, number = {22}, pages = {3249--3256}, abstract = {The iterative process of finding relevant information in biomedical literature and performing bioinformatics analyses might result in an endless loop for an inexperienced user, considering the exponential growth of scientific corpora and the plethora of tools designed to mine PubMed® and related biological databases. Herein, we describe BioTextQuest+, a web-based interactive knowledge exploration platform with significant advances to its predecessor (BioTextQuest), aiming to bridge processes such as bioentity recognition, functional annotation, document clustering and data integration towards literature mining and concept discovery. BioTextQuest+ enables PubMed and OMIM querying, retrieval of abstracts related to a targeted request and optimal detection of genes, proteins, molecular functions, pathways and biological processes within the retrieved documents. The front-end interface facilitates the browsing of document clustering per subject, the analysis of term co-occurrence, the generation of tag clouds containing highly represented terms per cluster and at-a-glance popup windows with information about relevant genes and proteins. Moreover, to support experimental research, BioTextQuest+ addresses integration of its primary functionality with biological repositories and software tools able to deliver further bioinformatics services. The Google-like interface extends beyond simple use by offering a range of advanced parameterization for expert users. We demonstrate the functionality of BioTextQuest+ through several exemplary research scenarios including author disambiguation, functional term enrichment, knowledge acquisition and concept discovery linking major human diseases, such as obesity and ageing. © The Author 2014. Published by Oxford University Press.}, note = {Publisher: Oxford University Press}, keywords = {}, pubstate = {published}, tppubtype = {article} } The iterative process of finding relevant information in biomedical literature and performing bioinformatics analyses might result in an endless loop for an inexperienced user, considering the exponential growth of scientific corpora and the plethora of tools designed to mine PubMed® and related biological databases. Herein, we describe BioTextQuest+, a web-based interactive knowledge exploration platform with significant advances to its predecessor (BioTextQuest), aiming to bridge processes such as bioentity recognition, functional annotation, document clustering and data integration towards literature mining and concept discovery. BioTextQuest+ enables PubMed and OMIM querying, retrieval of abstracts related to a targeted request and optimal detection of genes, proteins, molecular functions, pathways and biological processes within the retrieved documents. The front-end interface facilitates the browsing of document clustering per subject, the analysis of term co-occurrence, the generation of tag clouds containing highly represented terms per cluster and at-a-glance popup windows with information about relevant genes and proteins. Moreover, to support experimental research, BioTextQuest+ addresses integration of its primary functionality with biological repositories and software tools able to deliver further bioinformatics services. The Google-like interface extends beyond simple use by offering a range of advanced parameterization for expert users. We demonstrate the functionality of BioTextQuest+ through several exemplary research scenarios including author disambiguation, functional term enrichment, knowledge acquisition and concept discovery linking major human diseases, such as obesity and ageing. © The Author 2014. Published by Oxford University Press. |
Faulwetter, S; Markantonatou, V; Pavloudi, C; Papageorgiou, N; Keklikoglou, K; Chatzinikolaou, E; Pafilis, E; Chatzigeorgiou, G; Vasileiadou, K; Dailianis, T; Fanini, L; Koulouri, P; Arvanitidis, C Polytraits: A database on biological traits of marine polychaetes Journal Article Biodiversity Data Journal, 2 (1), 2014, ISSN: 13142828, (Publisher: Pensoft Publishers). @article{faulwetter_polytraits_2014, title = {Polytraits: A database on biological traits of marine polychaetes}, author = {S Faulwetter and V Markantonatou and C Pavloudi and N Papageorgiou and K Keklikoglou and E Chatzinikolaou and E Pafilis and G Chatzigeorgiou and K Vasileiadou and T Dailianis and L Fanini and P Koulouri and C Arvanitidis}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85018193846&doi=10.3897%2fBDJ.2.e1024&partnerID=40&md5=621b076567d09921bf9b5146a7f8844e}, doi = {10.3897/BDJ.2.e1024}, issn = {13142828}, year = {2014}, date = {2014-01-01}, journal = {Biodiversity Data Journal}, volume = {2}, number = {1}, abstract = {The study of ecosystem functioning - the role which organisms play in an ecosystem - is becoming increasingly important in marine ecological research. The functional structure of a community can be represented by a set of functional traits assigned to behavioural, reproductive and morphological characteristics. The collection of these traits from the literature is however a laborious and time-consuming process, and gaps of knowledge and restricted availability of literature are a common problem. Trait data are not yet readily being shared by research communities, and even if they are, a lack of trait data repositories and standards for data formats leads to the publication of trait information in forms which cannot be processed by computers. This paper describes Polytraits (http:// polytraits.lifewatchgreece.eu), a database on biological traits of marine polychaetes (bristle worms, Polychaeta: Annelida). At present, the database contains almost 20,000 records on morphological, behavioural and reproductive characteristics of more than 1,000 marine polychaete species, all referenced by literature sources. All data can be freely accessed through the project website in different ways and formats, both human-readable and machine-readable, and have been submitted to the Encyclopedia of Life for archival and integration with trait information from other sources. © Faulwetter S et al.}, note = {Publisher: Pensoft Publishers}, keywords = {}, pubstate = {published}, tppubtype = {article} } The study of ecosystem functioning - the role which organisms play in an ecosystem - is becoming increasingly important in marine ecological research. The functional structure of a community can be represented by a set of functional traits assigned to behavioural, reproductive and morphological characteristics. The collection of these traits from the literature is however a laborious and time-consuming process, and gaps of knowledge and restricted availability of literature are a common problem. Trait data are not yet readily being shared by research communities, and even if they are, a lack of trait data repositories and standards for data formats leads to the publication of trait information in forms which cannot be processed by computers. This paper describes Polytraits (http:// polytraits.lifewatchgreece.eu), a database on biological traits of marine polychaetes (bristle worms, Polychaeta: Annelida). At present, the database contains almost 20,000 records on morphological, behavioural and reproductive characteristics of more than 1,000 marine polychaete species, all referenced by literature sources. All data can be freely accessed through the project website in different ways and formats, both human-readable and machine-readable, and have been submitted to the Encyclopedia of Life for archival and integration with trait information from other sources. © Faulwetter S et al. |
Fanini, L; Zampicinini, G; Pafilis, E Beach parties: a case study on recreational human use of the beach and its effects on mobile arthropod fauna Journal Article Ethology Ecology & Evolution, 26 (1), pp. 69–79, 2014, ISSN: 0394-9370, 1828-7131. @article{fanini_beach_2014, title = {Beach parties: a case study on recreational human use of the beach and its effects on mobile arthropod fauna}, author = {L Fanini and G Zampicinini and E Pafilis}, url = {http://www.tandfonline.com/doi/abs/10.1080/03949370.2013.821674}, doi = {10.1080/03949370.2013.821674}, issn = {0394-9370, 1828-7131}, year = {2014}, date = {2014-01-01}, urldate = {2020-08-10}, journal = {Ethology Ecology & Evolution}, volume = {26}, number = {1}, pages = {69--79}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
2013 |
Pafilis, E; Frankild, S; I, Umer; Fanini, Lucia; Faulwetter, Sarah; Pavloudi, C; Schnetzer, J; Vasileiadou, A; Arvanitidis, C; Quince, C; Jensen, L J Identification of Environment Ontology terms in Text and Annotation of Biodiversity (ENVIRONMENTS-EOL) and Genomics (SEQenv) Information Inproceedings Pafilis, E (Ed.): Biodiversity Information Standards TDWG Conference, Florence, Italy, 2013. @inproceedings{pafilis_identification_2013, title = {Identification of Environment Ontology terms in Text and Annotation of Biodiversity (ENVIRONMENTS-EOL) and Genomics (SEQenv) Information}, author = {E Pafilis and S Frankild and Umer I and Lucia Fanini and Sarah Faulwetter and C Pavloudi and J Schnetzer and A Vasileiadou and C Arvanitidis and C Quince and L J Jensen}, editor = {E Pafilis}, year = {2013}, date = {2013-10-01}, booktitle = {Biodiversity Information Standards TDWG Conference, Florence, Italy}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
Pafilis, E; Frankild, S P; Fanini, L; Faulwetter, S; Pavloudi, C; Vasileiadou, A; Arvanitidis, C; Jensen, L J The SPECIES and ORGANISMS Resources for Fast and Accurate Identification of Taxonomic Names in Text Journal Article PLoS ONE, 8 (6), 2013, ISSN: 19326203. @article{pafilis_species_2013, title = {The SPECIES and ORGANISMS Resources for Fast and Accurate Identification of Taxonomic Names in Text}, author = {E Pafilis and S P Frankild and L Fanini and S Faulwetter and C Pavloudi and A Vasileiadou and C Arvanitidis and L J Jensen}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84879162734&doi=10.1371%2fjournal.pone.0065390&partnerID=40&md5=fee446181731231a273637d3d09c5002}, doi = {10.1371/journal.pone.0065390}, issn = {19326203}, year = {2013}, date = {2013-01-01}, journal = {PLoS ONE}, volume = {8}, number = {6}, abstract = {The exponential growth of the biomedical literature is making the need for efficient, accurate text-mining tools increasingly clear. The identification of named biological entities in text is a central and difficult task. We have developed an efficient algorithm and implementation of a dictionary-based approach to named entity recognition, which we here use to identify names of species and other taxa in text. The tool, SPECIES, is more than an order of magnitude faster and as accurate as existing tools. The precision and recall was assessed both on an existing gold-standard corpus and on a new corpus of 800 abstracts, which were manually annotated after the development of the tool. The corpus comprises abstracts from journals selected to represent many taxonomic groups, which gives insights into which types of organism names are hard to detect and which are easy. Finally, we have tagged organism names in the entire Medline database and developed a web resource, ORGANISMS, that makes the results accessible to the broad community of biologists. The SPECIES software is open source and can be downloaded from http://species.jensenlab.org along with dictionary files and the manually annotated gold-standard corpus. The ORGANISMS web resource can be found at http://organisms.jensenlab.org. © 2013 Pafilis et al.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The exponential growth of the biomedical literature is making the need for efficient, accurate text-mining tools increasingly clear. The identification of named biological entities in text is a central and difficult task. We have developed an efficient algorithm and implementation of a dictionary-based approach to named entity recognition, which we here use to identify names of species and other taxa in text. The tool, SPECIES, is more than an order of magnitude faster and as accurate as existing tools. The precision and recall was assessed both on an existing gold-standard corpus and on a new corpus of 800 abstracts, which were manually annotated after the development of the tool. The corpus comprises abstracts from journals selected to represent many taxonomic groups, which gives insights into which types of organism names are hard to detect and which are easy. Finally, we have tagged organism names in the entire Medline database and developed a web resource, ORGANISMS, that makes the results accessible to the broad community of biologists. The SPECIES software is open source and can be downloaded from http://species.jensenlab.org along with dictionary files and the manually annotated gold-standard corpus. The ORGANISMS web resource can be found at http://organisms.jensenlab.org. © 2013 Pafilis et al. |
Pafilis, E; Pavlopoulos, G A; Satagopam, V P; Papanikolaou, N; Horn, H; Arvanitidis, C; Jensen, L J; Schneider, R; Iliopoulos, I OnTheFly 2.0: A tool for automatic annotation of files and biological information extraction Inproceedings 13th IEEE International Conference on BioInformatics and BioEngineering, IEEE BIBE 2013, 2013, ISBN: 978-1-4799-3163-7. @inproceedings{pafilis_onthefly_2013, title = {OnTheFly 2.0: A tool for automatic annotation of files and biological information extraction}, author = {E Pafilis and G A Pavlopoulos and V P Satagopam and N Papanikolaou and H Horn and C Arvanitidis and L J Jensen and R Schneider and I Iliopoulos}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84894224481&doi=10.1109%2fBIBE.2013.6701679&partnerID=40&md5=dd43a25d9b79b4eafa0c9125bc8a1925}, doi = {10.1109/BIBE.2013.6701679}, isbn = {978-1-4799-3163-7}, year = {2013}, date = {2013-01-01}, booktitle = {13th IEEE International Conference on BioInformatics and BioEngineering, IEEE BIBE 2013}, abstract = {Retrieving all of the necessary information from databases about bioentities mentioned in an article is not a trivial or an easy task. Following the daily literature about a specific biological topic and collecting all the necessary information about the bioentities mentioned in the literature manually is tedious and time consuming. OnTheFly 2.0 is a web application mainly designed for non-computer experts which aims to automate data collection and knowledge extraction from biological literature in a user friendly and efficient way. OnTheFly 2.0 is able to extract bioentities from individual articles such as text, Microsoft Word, Excel and PDF files. With a simple drag-and-drop motion, the text of a document is extensively parsed for bioentities such as protein/gene names and chemical compound names. Utilizing high quality data integration platforms, OnTheFly allows the generation of informative summaries, interaction networks and at-a-glance popup windows containing knowledge related to the bioentities found in documents. OnTheFly 2.0 provides a concise application to automate the extraction of bioentities hidden in various documents and is offered as a web based application. It can be found at: http://onthefly.embl.de, http://onthefly.med.uoc.gr or http://onthefly.hcmr.gr. © 2013 IEEE.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Retrieving all of the necessary information from databases about bioentities mentioned in an article is not a trivial or an easy task. Following the daily literature about a specific biological topic and collecting all the necessary information about the bioentities mentioned in the literature manually is tedious and time consuming. OnTheFly 2.0 is a web application mainly designed for non-computer experts which aims to automate data collection and knowledge extraction from biological literature in a user friendly and efficient way. OnTheFly 2.0 is able to extract bioentities from individual articles such as text, Microsoft Word, Excel and PDF files. With a simple drag-and-drop motion, the text of a document is extensively parsed for bioentities such as protein/gene names and chemical compound names. Utilizing high quality data integration platforms, OnTheFly allows the generation of informative summaries, interaction networks and at-a-glance popup windows containing knowledge related to the bioentities found in documents. OnTheFly 2.0 provides a concise application to automate the extraction of bioentities hidden in various documents and is offered as a web based application. It can be found at: http://onthefly.embl.de, http://onthefly.med.uoc.gr or http://onthefly.hcmr.gr. © 2013 IEEE. |
Hardisty, A; Roberts, D; Addink, W; Aelterman, B; Agosti, D; Amaral-Zettler, L; Ariño, AH; Arvanitidis, C; Backeljau, T; Bailly, N; Belbin, L; Berendsohn, W; Bertrand, N; Caithness, N; Campbell, D; Cochrane, G; Conruyt, N; Culham, A; Damgaard, C; Davies, N; Fady, B; Faulwetter, Sarah; Feest, A; Field, D; Garnier, E; Geser, G; Gilbert, J; Grosche, B; Grosser, D; Herbinet, B; Hobern, D; Jones, A; de Jong, Y; King, D; Knapp, S; Koivula, H; Los, W; Meyer, C; Morris, RA; Morrison, N; Morse, D; Obst, M; Pafilis, E; Page, LM; Page, R; Pape, T; Parr, C; Paton, A; Patterson, D; Paymal, E; Penev, L; Pollet, M; Pyle, R; von Raab-Straube, E; Robert, V; Robertson, T; Rovellotti, O; Saarenmaa, H; Schalk, P; Schaminee, J; Schofield, P; Sier, A; Sierra, S; Smith, V; van Spronsen, E; Thornthon-Wood, S; van Tienderen, P; van Tol, J; Tuama, É Ó; Uetz, P; Vaas, L; Lebbe, Vignes R; Vision, T; Vu, D; Wever, De A; White, R; Willis, K; Young, F A decadal view of biodiversity informatics: challenges and priorities Journal Article BMC Ecology, 13 , pp. 16, 2013. @article{hardisty_decadal_2013, title = {A decadal view of biodiversity informatics: challenges and priorities}, author = {A Hardisty and D Roberts and W Addink and B Aelterman and D Agosti and L Amaral-Zettler and AH Ariño and C Arvanitidis and T Backeljau and N Bailly and L Belbin and W Berendsohn and N Bertrand and N Caithness and D Campbell and G Cochrane and N Conruyt and A Culham and C Damgaard and N Davies and B Fady and Sarah Faulwetter and A Feest and D Field and E Garnier and G Geser and J Gilbert and B Grosche and D Grosser and B Herbinet and D Hobern and A Jones and Y de Jong and D King and S Knapp and H Koivula and W Los and C Meyer and RA Morris and N Morrison and D Morse and M Obst and E Pafilis and LM Page and R Page and T Pape and C Parr and A Paton and D Patterson and E Paymal and L Penev and M Pollet and R Pyle and E von Raab-Straube and V Robert and T Robertson and O Rovellotti and H Saarenmaa and P Schalk and J Schaminee and P Schofield and A Sier and S Sierra and V Smith and E van Spronsen and S Thornthon-Wood and P van Tienderen and J van Tol and É Ó Tuama and P Uetz and L Vaas and R Vignes Lebbe and T Vision and D Vu and A De Wever and R White and K Willis and F Young}, url = {http://www.biomedcentral.com/1472-6785/13/16}, doi = {10.1186/1472-6785-13-16}, year = {2013}, date = {2013-01-01}, journal = {BMC Ecology}, volume = {13}, pages = {16}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
2012 |
Pafilis, E; Frankild, S; Fanini, Lucia; Faulwetter, Sarah; Pavloudi, C; Vasileiadou, A; Arvanitidis, C; Jensen, L J SPECIES: Organism Name Identification in the Scientific Literature Inproceedings Pafilis, E (Ed.): 7th conference of the Hellenic Society for Computational Biology and Bioinformatics, Heraklion, Greece, 2012. @inproceedings{pafilis_species_2012, title = {SPECIES: Organism Name Identification in the Scientific Literature}, author = {E Pafilis and S Frankild and Lucia Fanini and Sarah Faulwetter and C Pavloudi and A Vasileiadou and C Arvanitidis and L J Jensen}, editor = {E Pafilis}, year = {2012}, date = {2012-10-01}, booktitle = {7th conference of the Hellenic Society for Computational Biology and Bioinformatics, Heraklion, Greece}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
2011 |
Papanikolaou, Nikolas; Pafilis, Evangelos; Nikolaou, Stavros; Ouzounis, Christos A; Iliopoulos, Ioannis; Promponas, Vasilis J BioTextQuest: a web-based biomedical text mining suite for concept discovery Journal Article Bioinformatics, 27 (23), pp. 3327–3328, 2011, ISSN: 1460-2059, 1367-4803. @article{papanikolaou_biotextquest_2011, title = {BioTextQuest: a web-based biomedical text mining suite for concept discovery}, author = {Nikolas Papanikolaou and Evangelos Pafilis and Stavros Nikolaou and Christos A Ouzounis and Ioannis Iliopoulos and Vasilis J Promponas}, url = {https://academic.oup.com/bioinformatics/article-lookup/doi/10.1093/bioinformatics/btr564}, doi = {10.1093/bioinformatics/btr564}, issn = {1460-2059, 1367-4803}, year = {2011}, date = {2011-12-01}, urldate = {2020-08-17}, journal = {Bioinformatics}, volume = {27}, number = {23}, pages = {3327--3328}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Arvanitidis, C; Faulwetter, S; Chatzigeorgiou, G; Penev, L; Bánki, O; Dailianis, T; Pafilis, E; Kouratoras, M; Chatzinikolaou, E; Fanini, L; Vasileiadou, A; Pavloudi, C; Vavilis, P; Koulouri, P; Dounas, C Engaging the broader community in biodiversity research: The concept of the COMBER pilot project for divers in vibrant Journal Article ZooKeys, 150 , pp. 211–229, 2011, ISSN: 13132989. @article{arvanitidis_engaging_2011, title = {Engaging the broader community in biodiversity research: The concept of the COMBER pilot project for divers in vibrant}, author = {C Arvanitidis and S Faulwetter and G Chatzigeorgiou and L Penev and O Bánki and T Dailianis and E Pafilis and M Kouratoras and E Chatzinikolaou and L Fanini and A Vasileiadou and C Pavloudi and P Vavilis and P Koulouri and C Dounas}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84856457337&doi=10.3897%2fzookeys.150.2149&partnerID=40&md5=48d27ff65c2def3588c3e0f3a8b72a92}, doi = {10.3897/zookeys.150.2149}, issn = {13132989}, year = {2011}, date = {2011-01-01}, journal = {ZooKeys}, volume = {150}, pages = {211--229}, abstract = {This paper discusses the design and implementation of a citizen science pilot project, COMBER (Citizens' Network for the Observation of Marine Biodiv ERsity, http://www.comber.hcmr.gr, which has been initiated under the Vi BRANT EU e-infrastructure. It is designed and implemented for divers and snorkelers who are interested in participating in marine biodiversity citizen science projects. It shows the necessity of engaging the broader community in the marine biodiversity monitoring and research projects, networks and initiatives. It analyses the stakeholders, the industry and the relevant markets involved in diving activities and their potential to sustain these activities. The principles, including data policy and rewards for the participating divers through their own data, upon which this project is based are thoroughly discussed. The results of the users analysis and lessons learned so far are presented. Future plans include promotion, links with citizen science web developments, data publishing tools, and development of new scientific hypotheses to be tested by the data collected so far.}, keywords = {}, pubstate = {published}, tppubtype = {article} } This paper discusses the design and implementation of a citizen science pilot project, COMBER (Citizens' Network for the Observation of Marine Biodiv ERsity, http://www.comber.hcmr.gr, which has been initiated under the Vi BRANT EU e-infrastructure. It is designed and implemented for divers and snorkelers who are interested in participating in marine biodiversity citizen science projects. It shows the necessity of engaging the broader community in the marine biodiversity monitoring and research projects, networks and initiatives. It analyses the stakeholders, the industry and the relevant markets involved in diving activities and their potential to sustain these activities. The principles, including data policy and rewards for the participating divers through their own data, upon which this project is based are thoroughly discussed. The results of the users analysis and lessons learned so far are presented. Future plans include promotion, links with citizen science web developments, data publishing tools, and development of new scientific hypotheses to be tested by the data collected so far. |
2010 |
O’Donoghue, Seán I; Horn, Heiko; Pafilis, Evangelos; Haag, Sven; Kuhn, Michael; Satagopam, Venkata P; Schneider, Reinhard; Jensen, Lars J Reflect: A practical approach to web semantics Journal Article Web Semantics: Science, Services and Agents on the World Wide Web, 8 (2-3), pp. 182–189, 2010, ISSN: 15708268. @article{odonoghue_reflect_2010, title = {Reflect: A practical approach to web semantics}, author = {Seán I O’Donoghue and Heiko Horn and Evangelos Pafilis and Sven Haag and Michael Kuhn and Venkata P Satagopam and Reinhard Schneider and Lars J Jensen}, url = {https://linkinghub.elsevier.com/retrieve/pii/S1570826810000193}, doi = {10.1016/j.websem.2010.03.003}, issn = {15708268}, year = {2010}, date = {2010-07-01}, urldate = {2020-08-18}, journal = {Web Semantics: Science, Services and Agents on the World Wide Web}, volume = {8}, number = {2-3}, pages = {182--189}, abstract = {To date, adding semantic capabilities to web content usually requires considerable server-side re-engineering, thus only a tiny fraction of all web content currently has semantic annotations. Recently, we announced Reflect (http://reflect.ws), a free service that takes a more practical approach: Reflect uses augmented browsing to allow end-users to add systematic semantic annotations to any web-page in real-time, typically within seconds. In this paper we describe the tagging process in detail and show how further entity types can be added to Reflect; we also describe how publishers and content providers can access Reflect programmatically using SOAP, REST (HTTP post), and JavaScript. Usage of Reflect has grown rapidly within the life sciences, and while currently only genes, protein and small molecule names are tagged, we plan to soon expand the scope to include a much broader range of terms (e.g., Wikipedia entries). The popularity of Reflect demonstrates the use and feasibility of letting end-users decide how and when to add semantic annotations. Ultimately, ’semantics is in the eye of the end-user’, hence we believe end-user approaches such as Reflect will become increasingly important in semantic web technologies. © 2010 Elsevier B.V.}, keywords = {}, pubstate = {published}, tppubtype = {article} } To date, adding semantic capabilities to web content usually requires considerable server-side re-engineering, thus only a tiny fraction of all web content currently has semantic annotations. Recently, we announced Reflect (http://reflect.ws), a free service that takes a more practical approach: Reflect uses augmented browsing to allow end-users to add systematic semantic annotations to any web-page in real-time, typically within seconds. In this paper we describe the tagging process in detail and show how further entity types can be added to Reflect; we also describe how publishers and content providers can access Reflect programmatically using SOAP, REST (HTTP post), and JavaScript. Usage of Reflect has grown rapidly within the life sciences, and while currently only genes, protein and small molecule names are tagged, we plan to soon expand the scope to include a much broader range of terms (e.g., Wikipedia entries). The popularity of Reflect demonstrates the use and feasibility of letting end-users decide how and when to add semantic annotations. Ultimately, ’semantics is in the eye of the end-user’, hence we believe end-user approaches such as Reflect will become increasingly important in semantic web technologies. © 2010 Elsevier B.V. |
Katayama, Toshiaki; Arakawa, Kazuharu; Nakao, Mitsuteru; Ono, Keiichiro; Aoki-Kinoshita, Kiyoko F; Yamamoto, Yasunori; Yamaguchi, Atsuko; Kawashima, Shuichi; Chun, Hong-Woo; Aerts, Jan; Aranda, Bruno; Barboza, Load Hendrix; Bonnal, Raoul JP; Bruskiewich, Richard; Bryne, Jan C; Fernendez, Jose M; Funahashi, Akira; Gordon, Paul MK; Goto, Naohisa; Groscurth, Andreas; Gutteridge, Alex; Holland, Richard; Kano, Yoshinobu; Kawas, Edward A; Kerhornou, Arnaud; Kibukawa, Eri; Kinjo, Akira R; Kuhn, Michael; Lapp, Hilmar; Lehvaslaiho, Heikki; Nakamura, Hiroyuki; Nakamura, Yasukazu; Nishizawa, Tatsuya; Nobata, Chikashi; Noguchi, Tamotsu; Oinn, Thomas M; Okamoto, Shinobu; Owen, Stuart; Pafilis, Evangelos; Pocock, Matthew; Prins, Pjotr; Ranzinger, Rene; Reisinger, Florian; Salwinski, Lukasz; Schreiber, Mark; Senger, Martin; Shigemoto, Yasumasa; Standley, Daron M; Sugawara, Hideaki; Tashiro, Toshiyuki; Trelles, Oswaldo; Vos, Rutger A; Wilkinson, Mark D; York, William; Zmasek, Christian M; Asai, Kiyoshi; Takagi, Toshihisa The DBCLS BioHackathon: standardization and interoperability for bioinformatics web services and workflows Journal Article Journal of Biomedical Semantics, 1 , pp. 8, 2010, ISSN: 2041-1480. @article{katayama_dbcls_2010, title = {The DBCLS BioHackathon: standardization and interoperability for bioinformatics web services and workflows}, author = {Toshiaki Katayama and Kazuharu Arakawa and Mitsuteru Nakao and Keiichiro Ono and Kiyoko F Aoki-Kinoshita and Yasunori Yamamoto and Atsuko Yamaguchi and Shuichi Kawashima and Hong-Woo Chun and Jan Aerts and Bruno Aranda and Load Hendrix Barboza and Raoul JP Bonnal and Richard Bruskiewich and Jan C Bryne and Jose M Fernendez and Akira Funahashi and Paul MK Gordon and Naohisa Goto and Andreas Groscurth and Alex Gutteridge and Richard Holland and Yoshinobu Kano and Edward A Kawas and Arnaud Kerhornou and Eri Kibukawa and Akira R Kinjo and Michael Kuhn and Hilmar Lapp and Heikki Lehvaslaiho and Hiroyuki Nakamura and Yasukazu Nakamura and Tatsuya Nishizawa and Chikashi Nobata and Tamotsu Noguchi and Thomas M Oinn and Shinobu Okamoto and Stuart Owen and Evangelos Pafilis and Matthew Pocock and Pjotr Prins and Rene Ranzinger and Florian Reisinger and Lukasz Salwinski and Mark Schreiber and Martin Senger and Yasumasa Shigemoto and Daron M Standley and Hideaki Sugawara and Toshiyuki Tashiro and Oswaldo Trelles and Rutger A Vos and Mark D Wilkinson and William York and Christian M Zmasek and Kiyoshi Asai and Toshihisa Takagi}, doi = {10.1186/2041-1480-1-8}, issn = {2041-1480}, year = {2010}, date = {2010-01-01}, journal = {Journal of Biomedical Semantics}, volume = {1}, pages = {8}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
2009 |
Pafilis, Evangelos; O'Donoghue, Seán I; Jensen, Lars J; Horn, Heiko; Kuhn, Michael; Brown, Nigel P; Schneider, Reinhard Reflect: augmented browsing for the life scientist Journal Article Nature Biotechnology, 27 (6), pp. 508–510, 2009, ISSN: 1087-0156, 1546-1696. @article{pafilis_reflect_2009, title = {Reflect: augmented browsing for the life scientist}, author = {Evangelos Pafilis and Seán I O'Donoghue and Lars J Jensen and Heiko Horn and Michael Kuhn and Nigel P Brown and Reinhard Schneider}, url = {http://www.nature.com/articles/nbt0609-508}, doi = {10.1038/nbt0609-508}, issn = {1087-0156, 1546-1696}, year = {2009}, date = {2009-06-01}, urldate = {2020-08-18}, journal = {Nature Biotechnology}, volume = {27}, number = {6}, pages = {508--510}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Pavlopoulos, G A; Pafilis, E; Kuhn, M; Hooper, S D; Schneider, R OnTheFly: A tool for automated document-based text annotation, data linking and network generation Journal Article Bioinformatics, 25 , pp. 977 – 978, 2009. @article{pavlopoulos_onthefly_2009, title = {OnTheFly: A tool for automated document-based text annotation, data linking and network generation}, author = {G A Pavlopoulos and E Pafilis and M Kuhn and S D Hooper and R Schneider}, url = {http://www.scopus.com/inward/record.url?eid=2-s2.0-63549085123&partnerID=40&md5=3cc4fe1671646d4fd2cbfa49d70e9342}, year = {2009}, date = {2009-01-01}, journal = {Bioinformatics}, volume = {25}, pages = {977 -- 978}, abstract = {OnTheFly is a web-based application that applies biological named entity recognition to enrich Microsoft Office, PDF and plain text documents. The input files are converted into the HTML format and then sent to the Reflect tagging server, which highlights biological entity names like genes, proteins and chemicals, and attaches to them JavaScript code to invoke a summary pop-up window. The window provides an overview of relevant information about the entity, such as a protein description, the domain composition, a link to the 3D structure and links to other relevant online resources. OnTheFly is also able to extract the bioentities mentioned in a set of files and to produce a graphical representation of the networks of the known and predicted associations of these entities by retrieving the information from the STITCH database. © 2009 The Author(s).}, keywords = {}, pubstate = {published}, tppubtype = {article} } OnTheFly is a web-based application that applies biological named entity recognition to enrich Microsoft Office, PDF and plain text documents. The input files are converted into the HTML format and then sent to the Reflect tagging server, which highlights biological entity names like genes, proteins and chemicals, and attaches to them JavaScript code to invoke a summary pop-up window. The window provides an overview of relevant information about the entity, such as a protein description, the domain composition, a link to the 3D structure and links to other relevant online resources. OnTheFly is also able to extract the bioentities mentioned in a set of files and to produce a graphical representation of the networks of the known and predicted associations of these entities by retrieving the information from the STITCH database. © 2009 The Author(s). |
2008 |
Stockinger, H; Attwood, T; Chohan, S N; Côté, R; Cudré-Mauroux, P; Falquet, L; Fernandes, P; Finn, R D; Hupponen, T; Korpelainen, E; Labarga, A; Laugraud, A; Lima, T; Pafilis, E; Pagni, M; Pettifer, S; Phan, I; Rahman, N Experience using web services for biological sequence analysis Journal Article Briefings in Bioinformatics, 9 , pp. 493 – 505, 2008. @article{stockinger_experience_2008, title = {Experience using web services for biological sequence analysis}, author = {H Stockinger and T Attwood and S N Chohan and R Côté and P Cudré-Mauroux and L Falquet and P Fernandes and R D Finn and T Hupponen and E Korpelainen and A Labarga and A Laugraud and T Lima and E Pafilis and M Pagni and S Pettifer and I Phan and N Rahman}, url = {http://www.scopus.com/inward/record.url?eid=2-s2.0-58149380090&partnerID=40&md5=edc5102ee3f35fd69ae7f2f083454ac3}, year = {2008}, date = {2008-01-01}, journal = {Briefings in Bioinformatics}, volume = {9}, pages = {493 -- 505}, abstract = {Programmatic access to data and tools through the web using so-called web services has an important role to play in bioinformatics. In this article, we discuss the most popular approaches based on SOAP/WS-I and REST and describe our, a cross section of the community, experiences with providing and using web services in the context of biological sequence analysis. We briefly review main technological approaches as well as best practice hints that are useful for both users and developers. Finally, syntactic and semantic data integration issues with multiple web services are discussed. © The Author 2008. Published by Oxford University Press.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Programmatic access to data and tools through the web using so-called web services has an important role to play in bioinformatics. In this article, we discuss the most popular approaches based on SOAP/WS-I and REST and describe our, a cross section of the community, experiences with providing and using web services in the context of biological sequence analysis. We briefly review main technological approaches as well as best practice hints that are useful for both users and developers. Finally, syntactic and semantic data integration issues with multiple web services are discussed. © The Author 2008. Published by Oxford University Press. |
Pavlopoulos, G A; O’Donoghue, S I; Satagopam, V P; Soldatos, T G; Pafilis, E; Schneider, R Arena3D: Visualization of biological networks in 3D Journal Article BMC Systems Biology, 2 , 2008. @article{pavlopoulos_arena3d_2008, title = {Arena3D: Visualization of biological networks in 3D}, author = {G A Pavlopoulos and S I O’Donoghue and V P Satagopam and T G Soldatos and E Pafilis and R Schneider}, url = {http://www.scopus.com/inward/record.url?eid=2-s2.0-60749103244&partnerID=40&md5=e94bd6d808623720d920ffe64d0c596d}, year = {2008}, date = {2008-01-01}, journal = {BMC Systems Biology}, volume = {2}, abstract = {Background: Complexity is a key problem when visualizing biological networks; as the number of entities increases, most graphical views become incomprehensible. Our goal is to enable many thousands of entities to be visualized meaningfully and with high performance. Results: We present a new visualization tool, Arena3D, which introduces a new concept of staggered layers in 3D space. Related data - such as proteins, chemicals, or pathways - can be grouped onto separate layers and arranged via layout algorithms, such as Fruchterman-Reingold, distance geometry, and a novel hierarchical layout. Data on a layer can be clustered via k-means, affinity propagation, Markov clustering, neighbor joining, tree clustering, or UPGMA (’unweighted pair-group method with arithmetic mean’). A simple input format defines the name and URL for each node, and defines connections or similarity scores between pairs of nodes. The use of Arena3D is illustrated with datasets related to Huntington’s disease. Conclusion: Arena3D is a user friendly visualization tool that is able to visualize biological or any other network in 3D space. It is free for academic use and runs on any platform. It can be downloaded or lunched directly from http://arena3d.org. Java3D library and Java 1.5 need to be pre-installed for the software to run. © 2008 Pavlopoulos et al; licensee BioMed Central Ltd.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Background: Complexity is a key problem when visualizing biological networks; as the number of entities increases, most graphical views become incomprehensible. Our goal is to enable many thousands of entities to be visualized meaningfully and with high performance. Results: We present a new visualization tool, Arena3D, which introduces a new concept of staggered layers in 3D space. Related data - such as proteins, chemicals, or pathways - can be grouped onto separate layers and arranged via layout algorithms, such as Fruchterman-Reingold, distance geometry, and a novel hierarchical layout. Data on a layer can be clustered via k-means, affinity propagation, Markov clustering, neighbor joining, tree clustering, or UPGMA (’unweighted pair-group method with arithmetic mean’). A simple input format defines the name and URL for each node, and defines connections or similarity scores between pairs of nodes. The use of Arena3D is illustrated with datasets related to Huntington’s disease. Conclusion: Arena3D is a user friendly visualization tool that is able to visualize biological or any other network in 3D space. It is free for academic use and runs on any platform. It can be downloaded or lunched directly from http://arena3d.org. Java3D library and Java 1.5 need to be pre-installed for the software to run. © 2008 Pavlopoulos et al; licensee BioMed Central Ltd. |
2007 |
Barbosa-Silva, A; Pafilis, E; Ortega, J M; Schneider, R Development of SRS.php, a Simple Object Access Protocol-based library for data acquisition from integrated biological databases Journal Article Genetics and Molecular Research, 6 , pp. 1142 – 1150, 2007. @article{barbosa-silva_development_2007, title = {Development of SRS.php, a Simple Object Access Protocol-based library for data acquisition from integrated biological databases}, author = {A Barbosa-Silva and E Pafilis and J M Ortega and R Schneider}, url = {http://www.scopus.com/inward/record.url?eid=2-s2.0-37349037571&partnerID=40&md5=0c3169ca0592a88d87aff04936c7e13a}, year = {2007}, date = {2007-01-01}, journal = {Genetics and Molecular Research}, volume = {6}, pages = {1142 -- 1150}, abstract = {Data integration has become an important task for biological database providers. The current model for data exchange among different sources simplifies the manner that distinct information is accessed by users. The evolution of data representation from HTML to XML enabled programs, instead of humans, to interact with biological databases. We present here SRS.php, a PHP library that can interact with the data integration Sequence Retrieval System (SRS). The library has been written using SOAP definitions, and permits the programmatic communication through webservices with the SRS. The interactions are possible by invoking the methods described in WSDL by exchanging XML messages. The current functions available in the library have been built to access specific data stored in any of the 90 different databases (such as UNIPROT, KEGG and GO) using the same query syntax format. The inclusion of the described functions in the source of scripts written in PHP enables them as webservice clients to the SRS server. The functions permit one to query the whole content of any SRS database, to list specific records in these databases, to get specific fields from the records, and to link any record among any pair of linked databases. The case study presented exemplifies the library usage to retrieve information regarding registries of a Plant Defense Mechanisms database. The Plant Defense Mechanisms database is currently being developed, and the proposal of SRS.php library usage is to enable the data acquisition for the further warehousing tasks related to its setup and maintenance.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Data integration has become an important task for biological database providers. The current model for data exchange among different sources simplifies the manner that distinct information is accessed by users. The evolution of data representation from HTML to XML enabled programs, instead of humans, to interact with biological databases. We present here SRS.php, a PHP library that can interact with the data integration Sequence Retrieval System (SRS). The library has been written using SOAP definitions, and permits the programmatic communication through webservices with the SRS. The interactions are possible by invoking the methods described in WSDL by exchanging XML messages. The current functions available in the library have been built to access specific data stored in any of the 90 different databases (such as UNIPROT, KEGG and GO) using the same query syntax format. The inclusion of the described functions in the source of scripts written in PHP enables them as webservice clients to the SRS server. The functions permit one to query the whole content of any SRS database, to list specific records in these databases, to get specific fields from the records, and to link any record among any pair of linked databases. The case study presented exemplifies the library usage to retrieve information regarding registries of a Plant Defense Mechanisms database. The Plant Defense Mechanisms database is currently being developed, and the proposal of SRS.php library usage is to enable the data acquisition for the further warehousing tasks related to its setup and maintenance. |
Evangelos Pafilis
2024 |
The ELIXIR Biodiversity Community: Understanding short- and long-term changes in biodiversity Journal Article F1000Research, 12 , pp. 499, 2024, ISSN: 2046-1402. |
Visualizing metagenomic and metatranscriptomic data: A comprehensive review Journal Article Computational and Structural Biotechnology Journal, 23 , pp. 2011–2033, 2024, ISSN: 20010370. |
2023 |
metaGOflow: a workflow for the analysis of marine Genomic Observatories shotgun metagenomics data Journal Article GigaScience, 12 , pp. giad078, 2023, ISSN: 2047-217X. |
S1000: a better taxonomic name corpus for biomedical information extraction Journal Article Bioinformatics, 39 (6), pp. btad369, 2023, ISSN: 1367-4811. |
NAR Genomics and Bioinformatics, 5 (2), pp. lqad053, 2023, ISSN: 2631-9268. |
Exploring microbial functional biodiversity at the protein family level—From metagenomic sequence reads to annotated protein clusters Journal Article Frontiers in Bioinformatics, 3 , pp. 1157956, 2023, ISSN: 2673-7647. |
2022 |
PREGO: A Literature and Data-Mining Resource to Associate Microorganisms, Biological Processes, and Environment Types Journal Article Microorganisms, 10 (2), pp. 293, 2022, ISSN: 2076-2607. |
Automating the Curation Process of Historical Literature on Marine Biodiversity Using Text Mining: The DECO Workflow Journal Article Frontiers in Marine Science, 9 , pp. 940844, 2022, ISSN: 2296-7745. |
2021 |
NAR Genomics and Bioinformatics, 3 (4), pp. lqab090, 2021, ISSN: 2631-9268. |
Microbiome Metadata Standards: Report of the National Microbiome Data Collaborative’s Workshop and Follow-On Activities Journal Article 6 , 2021. |
0s and 1s in marine molecular research: a regional HPC perspective Journal Article GigaScience, 10 (8), pp. giab053, 2021, ISSN: 2047-217X. |
2020 |
PEMA: a flexible Pipeline for Environmental DNA Metabarcoding Analysis of the 16S/18S ribosomal RNA, ITS, and COI marker genes Journal Article GigaScience, 9 (3), 2020, ISSN: 2047-217X, (_eprint: https://academic.oup.com/gigascience/article-pdf/9/3/giaa022/32894405/giaa022.pdf). |
2019 |
Establishment of computational biology in Greece and Cyprus: Past, present, and future Journal Article PLOS Computational Biology, 15 (12), pp. e1007532, 2019, ISSN: 1553-7358, (BIODIV). |
2016 |
EMODnet Workshop on mechanisms and guidelines to mobilise historical data into biogeographic databases Journal Article Research Ideas and Outcomes, 2 , pp. e9774, 2016, ISSN: 2367-7163. |
Environmental variability and heavy metal concentrations from five lagoons in the Ionian Sea (Amvrakikos Gulf, W Greece) Journal Article Biodiversity Data Journal, 4 (1), 2016, ISSN: 13142828, (Publisher: Pensoft Publishers). |
EXTRACT: Interactive extraction of environment metadata and term suggestion for metagenomic sample annotation Journal Article Database, 2016 , 2016, ISSN: 17580463, (Publisher: Oxford University Press). |
Optimized R functions for analysis of ecological community data using the R virtual laboratory (RvLab) Journal Article Biodiversity Data Journal, 4 (1), 2016, ISSN: 13142828, (Publisher: Pensoft Publishers). |
Overview of the interactive task in BioCreative V Journal Article Database, 2016 , 2016, ISSN: 17580463, (Publisher: Oxford University Press). |
Seqenv: Linking sequences to environments through text mining Journal Article PeerJ, 2016 (12), 2016, ISSN: 21678359, (Publisher: PeerJ Inc.). |
Value, but high costs in post-deposition data Curation Journal Article Database, 2016 , 2016, ISSN: 17580463, (Publisher: Oxford University Press). |
The environment ontology in 2016: Bridging domains with increased scope, semantic density, and interoperation Journal Article Journal of Biomedical Semantics, 7 (1), 2016, ISSN: 20411480, (Publisher: BioMed Central Ltd.). |
2015 |
ENVIRONMENTS and EOL: Identification of Environment Ontology terms in text and the annotation of the Encyclopedia of Life Journal Article Bioinformatics, 31 (11), pp. 1872–1874, 2015, ISSN: 13674803, (Publisher: Oxford University Press). |
2014 |
The founding charter of the Genomic Observatories Network Journal Article GigaScience, 3 (1), 2014, ISSN: 2047217X, (Publisher: BioMed Central Ltd.). |
Automated Annotation of Scientific Documents: Increasing Access to Biological Knowledge Incollection Elloumi, Mourad (Ed.): Biological Knowledge Discovery Handbook: Preprocessing, Mining and Postprocessing of Biological Data, pp. 869–900, Wiley, 2014, (Backup Publisher: Wiley Section: Automated Annotation of Scientific Documents: Increasing Access to Biological Knowledge). |
BioTextQuest+: A knowledge integration platform for literature mining and concept discovery Journal Article Bioinformatics, 30 (22), pp. 3249–3256, 2014, ISSN: 13674803, (Publisher: Oxford University Press). |
Polytraits: A database on biological traits of marine polychaetes Journal Article Biodiversity Data Journal, 2 (1), 2014, ISSN: 13142828, (Publisher: Pensoft Publishers). |
Beach parties: a case study on recreational human use of the beach and its effects on mobile arthropod fauna Journal Article Ethology Ecology & Evolution, 26 (1), pp. 69–79, 2014, ISSN: 0394-9370, 1828-7131. |
2013 |
Identification of Environment Ontology terms in Text and Annotation of Biodiversity (ENVIRONMENTS-EOL) and Genomics (SEQenv) Information Inproceedings Pafilis, E (Ed.): Biodiversity Information Standards TDWG Conference, Florence, Italy, 2013. |
The SPECIES and ORGANISMS Resources for Fast and Accurate Identification of Taxonomic Names in Text Journal Article PLoS ONE, 8 (6), 2013, ISSN: 19326203. |
OnTheFly 2.0: A tool for automatic annotation of files and biological information extraction Inproceedings 13th IEEE International Conference on BioInformatics and BioEngineering, IEEE BIBE 2013, 2013, ISBN: 978-1-4799-3163-7. |
A decadal view of biodiversity informatics: challenges and priorities Journal Article BMC Ecology, 13 , pp. 16, 2013. |
2012 |
SPECIES: Organism Name Identification in the Scientific Literature Inproceedings Pafilis, E (Ed.): 7th conference of the Hellenic Society for Computational Biology and Bioinformatics, Heraklion, Greece, 2012. |
2011 |
BioTextQuest: a web-based biomedical text mining suite for concept discovery Journal Article Bioinformatics, 27 (23), pp. 3327–3328, 2011, ISSN: 1460-2059, 1367-4803. |
Engaging the broader community in biodiversity research: The concept of the COMBER pilot project for divers in vibrant Journal Article ZooKeys, 150 , pp. 211–229, 2011, ISSN: 13132989. |
2010 |
Reflect: A practical approach to web semantics Journal Article Web Semantics: Science, Services and Agents on the World Wide Web, 8 (2-3), pp. 182–189, 2010, ISSN: 15708268. |
The DBCLS BioHackathon: standardization and interoperability for bioinformatics web services and workflows Journal Article Journal of Biomedical Semantics, 1 , pp. 8, 2010, ISSN: 2041-1480. |
2009 |
Reflect: augmented browsing for the life scientist Journal Article Nature Biotechnology, 27 (6), pp. 508–510, 2009, ISSN: 1087-0156, 1546-1696. |
OnTheFly: A tool for automated document-based text annotation, data linking and network generation Journal Article Bioinformatics, 25 , pp. 977 – 978, 2009. |
2008 |
Experience using web services for biological sequence analysis Journal Article Briefings in Bioinformatics, 9 , pp. 493 – 505, 2008. |
Arena3D: Visualization of biological networks in 3D Journal Article BMC Systems Biology, 2 , 2008. |
2007 |
Development of SRS.php, a Simple Object Access Protocol-based library for data acquisition from integrated biological databases Journal Article Genetics and Molecular Research, 6 , pp. 1142 – 1150, 2007. |