Source code for aiida_fleur.parsers.fleur

###############################################################################
# Copyright (c), Forschungszentrum Jülich GmbH, IAS-1/PGI-1, Germany.         #
#                All rights reserved.                                         #
# This file is part of the AiiDA-FLEUR package.                               #
#                                                                             #
# The code is hosted on GitHub at https://github.com/JuDFTteam/aiida-fleur    #
# For further information on the license, see the LICENSE.txt file            #
# For further information please visit http://www.flapw.de or                 #
# http://aiida-fleur.readthedocs.io/en/develop/                               #
###############################################################################
"""
This module contains the parser for a FLEUR calculation and methods for parsing
different files produced by FLEUR.

Please implement file parsing routines that they can be executed from outside
the parser. Makes testing and portability easier.
"""
# TODO: warnings
import re
import json
from lxml import etree

from aiida.parsers import Parser
from aiida.orm import Dict
from aiida.common.exceptions import NotExistent

from masci_tools.io.parsers.fleur import outxml_parser
from masci_tools.io.parsers.fleur_schema import InputSchemaDict

#Phrases in this list are used to detect out of
#memory errors
OUT_OF_MEMORY_PHRASES = [
    'cgroup out-of-memory handler',
    'Out Of Memory',
    'Allocation of array for communication failed'  #from io/eig66_mpi
]


[docs]class FleurParser(Parser): """ This class is the implementation of the Parser class for FLEUR. It parses the FLEUR output if the calculation was successful, i.e checks if all files are there that should be and their condition. Then it parses the out.xml file and returns a (simple) parameterData node with the results of the last iteration. Other files (DOS.x, bands.x, relax.xml, ...) are also parsed if they are retrieved. """ _setting_key = 'parser_options'
[docs] def get_linkname_outparams_complex(self): """ Returns the name of the link to the output_complex Node contains the Fleur output in a rather complex dictionary. """ return 'output_complex'
[docs] def get_linkname_outparams(self): """ Returns the name of the link to the output_complex Node contains the Fleur output in a rather complex dictionary. """ return 'output_parameters'
[docs] def parse(self, **kwargs): """ Receives in input a dictionary of retrieved nodes. Does all the logic here. Checks presents of files. Calls routines to parse them and returns parameter nodes and success. :return successful: Bool, if overall parsing was successful or not :return new_nodes_list: list of tuples of two (linkname, Dataobject), nodes to be stored by AiiDA """ ####### init some variables ###### # these files should be at least present after success of a Fleur run calc = self.node FleurCalculation = calc.process_class # this files should be retrieved should_retrieve = calc.get_attribute('retrieve_list') has_xml_outfile = False has_relax_file = False ######### Check presence of files ###### # select the folder object # Check that the retrieved folder is there try: output_folder = self.retrieved except NotExistent: self.logger.error('No retrieved folder found') return self.exit_codes.ERROR_NO_RETRIEVED_FOLDER # check what is inside the folder list_of_files = output_folder.list_object_names() self.logger.info(f'File list: {list_of_files}') # has output xml file, otherwise error if FleurCalculation._OUTXML_FILE_NAME not in list_of_files: self.logger.error(f"XML out not found '{FleurCalculation._OUTXML_FILE_NAME}'") has_xml_outfile = False #Return after the error lines were processed else: has_xml_outfile = True # check if all files expected are there for the calculation for file in should_retrieve: if file not in list_of_files: self.logger.warning( f"Expected file '{file}' not found in retrieved folder, it was probably not created by fleur") # check if something was written to the error file if FleurCalculation._ERROR_FILE_NAME in list_of_files: errorfile = FleurCalculation._ERROR_FILE_NAME # read try: with output_folder.open(errorfile, 'r') as efile: error_file_lines = efile.read() # Note: read(), not readlines() except OSError: self.logger.error(f'Failed to open error file: {errorfile}.') return self.exit_codes.ERROR_OPENING_OUTPUTS if error_file_lines: if isinstance(error_file_lines, bytes): error_file_lines = error_file_lines.replace(b'\x00', b' ') else: error_file_lines = error_file_lines.replace('\x00', ' ') if 'Run finished successfully' not in error_file_lines: self.logger.warning('The following was written into std error and piped to {}' ' : \n {}'.format(errorfile, error_file_lines)) self.logger.error('FLEUR calculation did not finish successfully.') # here we estimate how much memory was available and consumed mpiprocs = self.node.get_attribute('resources').get('num_mpiprocs_per_machine', 1) kb_used = 0.0 if has_xml_outfile: with output_folder.open(FleurCalculation._OUTXML_FILE_NAME, 'r') as out_file: # lazy out.xml parsing outlines = out_file.read() try: line_avail = re.findall(r'<mem memoryPerNode="\d+', outlines)[0] mem_kb_avail = int(re.findall(r'\d+', line_avail)[0]) except IndexError: mem_kb_avail = 1.0 self.logger.info('Did not manage to find memory available info.') else: usage_json = FleurCalculation._USAGE_FILE_NAME if usage_json in list_of_files: with output_folder.open(usage_json, 'r') as us_file: usage = json.load(us_file) kb_used = usage['data']['VmPeak'] else: try: line_used = re.findall(r'used.+', error_file_lines)[0] kb_used = int(re.findall(r'\d+', line_used)[2]) except IndexError: self.logger.info('Did not manage to find memory usage info.') else: kb_used = 0.0 mem_kb_avail = 1.0 self.logger.info('Did not manage to find memory available info.') self.logger.info('Did not manage to find memory usage info.') # here we estimate how much walltime was available and consumed try: time_avail_sec = self.node.attributes['last_job_info']['requested_wallclock_time_seconds'] time_calculated = self.node.attributes['last_job_info']['wallclock_time_seconds'] if 0.97 * time_avail_sec < time_calculated: return self.exit_codes.ERROR_TIME_LIMIT except KeyError: pass if kb_used * mpiprocs / mem_kb_avail > 0.93 or \ any(phrase in error_file_lines for phrase in OUT_OF_MEMORY_PHRASES): return self.exit_codes.ERROR_NOT_ENOUGH_MEMORY if 'TIME LIMIT' in error_file_lines or 'time limit' in error_file_lines: return self.exit_codes.ERROR_TIME_LIMIT if 'Atom spills out into vacuum during relaxation' in error_file_lines: return self.exit_codes.ERROR_VACUUM_SPILL_RELAX if 'Error checking M.T. radii' in error_file_lines: return self.exit_codes.ERROR_MT_RADII if 'No solver linked for Hubbard 1' in error_file_lines: return self.exit_codes.ERROR_MISSING_DEPENDENCY.format(name='edsolver') if 'FLEUR is not linked against libxc' in error_file_lines: return self.exit_codes.ERROR_MISSING_DEPENDENCY.format(name='libxc') if 'Overlapping MT-spheres during relaxation: ' in error_file_lines: overlap_line = re.findall(r'\S+ +\S+ olap: +\S+', error_file_lines)[0].split() with output_folder.open('relax.xml', 'r') as rlx: schema_dict = InputSchemaDict.fromVersion('0.34') relax_dict = parse_relax_file(rlx, schema_dict) it_number = len(relax_dict['energies']) + 1 # relax.xml was not updated error_params = { 'error_name': 'MT_OVERLAP_RELAX', 'description': ('This output node contains information' 'about FLEUR error'), 'overlapped_indices': overlap_line[:2], 'overlaping_value': overlap_line[3], 'iteration_number': it_number } link_name = self.get_linkname_outparams() error_params = Dict(error_params) self.out('error_params', error_params) return self.exit_codes.ERROR_MT_RADII_RELAX if 'parent_folder' in calc.inputs: # problem in reusing cdn for relaxations, drop cdn if 'fleurinp' in calc.inputs: if 'relax.xml' in calc.inputs.fleurinp.files: return self.exit_codes.ERROR_DROP_CDN return self.exit_codes.ERROR_FLEUR_CALC_FAILED #Catch all exit code for an unknown failure return self.exit_codes.ERROR_FLEUR_CALC_FAILED # if a relax.xml was retrieved if FleurCalculation._RELAX_FILE_NAME in list_of_files: self.logger.info('relax.xml file found in retrieved folder') has_relax_file = True ####### Parse the files ######## if not has_xml_outfile: return self.exit_codes.ERROR_NO_OUTXML # open output file with output_folder.open(FleurCalculation._OUTXML_FILE_NAME, 'rb') as outxmlfile_opened: success = True parser_info = {} try: out_dict = outxml_parser(outxmlfile_opened, parser_info_out=parser_info, ignore_validation=True) except (ValueError, FileNotFoundError, KeyError) as exc: self.logger.error(f'XML output parsing failed: {str(exc)}') success = False # Call routines for output node creation if not success: self.logger.error('Parsing of XML output file was not successfull.') outxml_params = Dict(parser_info) link_name = self.get_linkname_outparams() self.out(link_name, outxml_params) return self.exit_codes.ERROR_XMLOUT_PARSING_FAILED if out_dict: outxml_params = Dict({**out_dict, **parser_info}) link_name = self.get_linkname_outparams() self.out(link_name, outxml_params) else: self.logger.error('Something went wrong, no out_dict found') outxml_params = Dict(parser_info) link_name = self.get_linkname_outparams() self.out(link_name, outxml_params) if has_relax_file: relax_name = FleurCalculation._RELAX_FILE_NAME try: fleurinp = calc.inputs.fleurinp except NotExistent: old_relax_text = '' else: if relax_name in fleurinp.list_object_names(): with fleurinp.open(relax_name, 'r') as rlx: old_relax_text = rlx.read() else: old_relax_text = '' inp_version = outxml_params.get_dict().get('input_file_version', '0.34') schema_dict = InputSchemaDict.fromVersion(inp_version) # dummy comparison between old and new relax with output_folder.open(relax_name, 'rb') as rlx: new_relax_text = rlx.read() if new_relax_text != old_relax_text: try: relax_dict = parse_relax_file(rlx, schema_dict) except etree.XMLSyntaxError: return self.exit_codes.ERROR_RELAX_PARSING_FAILED self.out('relax_parameters', relax_dict)
[docs]def parse_relax_file(relax_file, schema_dict): """ This function parsers relax.xml output file and returns a Dict containing all the data given there. """ from masci_tools.util.xml.xml_getters import get_relaxation_information relax_file.seek(0) tree = etree.parse(relax_file) out_dict = get_relaxation_information(tree, schema_dict) return Dict(out_dict)