Source code for ipymd.data_input.cif

# -*- coding: utf-8 -*-
"""
Created on Wed May 18 22:24:10 2016

@author: cjs14

adapted from from http://physics.bu.edu/~erikl/research/tools/crystals/read_cif.py
"""
import os
import math

import numpy as np
import pandas as pd
    
from .base import DataInput

[docs]class CIF(DataInput):
    """ Build a crystal from  a Crystallographic Information File (.cif)

    """ 
[docs]    def setup_data(self, file_path, override_abc=[], ignore_overlaps=False):
        """ Build a crystal from  a Crystallographic Information File (.cif)
    
        Parameters
        -----------
        file_path : str
            path to file
         override_abc : [] or [a,b,c]
             if not empty, will override a, b, c length parameters given in file
            
        Notes
        -----
        here is a typical example of a CIF file:
            
             _cell_length_a 4.916
             _cell_length_b 4.916
             _cell_length_c 5.4054
             _cell_angle_alpha 90
             _cell_angle_beta 90
             _cell_angle_gamma 120
             _cell_volume 113.131
             _exptl_crystal_density_diffrn      2.646
             _symmetry_space_group_name_H-M 'P 32 2 1'
             loop_
             _space_group_symop_operation_xyz
               'x,y,z'
               'y,x,2/3-z'
               '-y,x-y,2/3+z'
               '-x,-x+y,1/3-z'
               '-x+y,-x,1/3+z'
               'x-y,-y,-z'
             loop_
             _atom_site_label
             _atom_site_fract_x
             _atom_site_fract_y
             _atom_site_fract_z
             Si   0.46970   0.00000   0.00000
             O   0.41350   0.26690   0.11910

        """ 
        self._file_path = file_path
        self._ignore_overlaps = ignore_overlaps
        self._override_abc = override_abc
        self._data_set = True
    
    def _get_atom_data(self, step):
        """ return atom data 
        """
        cif_data = self._read_cif_file(self._file_path)
        if len(self._override_abc)==3:
            cif_data['_cell_length_a'] = self._override_abc[0]
            cif_data['_cell_length_b'] = self._override_abc[1]
            cif_data['_cell_length_c'] = self._override_abc[2]
            cif_data['_cell_volume'] = np.nan
            
        atoms_df,origin,a,b,c = self._convert_cif_data(cif_data, self._ignore_overlaps)
        self._add_colors(atoms_df)
        self._add_radii(atoms_df)
        return atoms_df
    
    def _get_meta_data(self, step):

        """ return pandas.Series of origin, a, b & c coordinates 
        """
        cif_data = self._read_cif_file(self._file_path)
        if len(self._override_abc)==3:
            cif_data['_cell_length_a'] = self._override_abc[0]
            cif_data['_cell_length_b'] = self._override_abc[1]
            cif_data['_cell_length_c'] = self._override_abc[2]
            cif_data['_cell_volume'] = np.nan

        atoms_df,origin,a,b,c = self._convert_cif_data(cif_data, self._ignore_overlaps)
        return pd.Series([origin,a,b,c],index=['origin','a','b','c'])

    def _count_configs(self):
        """ return int of total number of atomic configurations """
        return 1
        
    #TODO read and deal with occupancy values (and overlapping atoms)        
    def _read_cif_file(self, file_path):
        """
        
        Read CIF file, and extract the necessary info in the form of a dictionary.
        E.g., the value of "_cell_volume" can be found with data['_cell_volume'].
        
        """
        assert os.path.exists(file_path), '{0} does not exist'.format(file_path)
        
        data = {}
        
        # Open the CIF file.
        with open(file_path, 'r') as f:
    
            reading_sym_ops = False
            
            atom_headers = []
    
            # Read lines one by one.
            for line in f:
    
                # Split into columns.
                cols = line.split()
                if (len(cols) == 0): continue
    
                # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                # Identify the keyword.  Here are the simply "key: value" ones.
                if (cols[0] == '_cell_length_a'):
                    data['_cell_length_a'] = float(cols[1])
    
                elif (cols[0] == '_cell_length_b'):
                    data['_cell_length_b'] = float(cols[1])
    
                elif (cols[0] == '_cell_length_c'):
                    data['_cell_length_c'] = float(cols[1])
    
                elif (cols[0] == '_cell_angle_alpha'):
                    data['_cell_angle_alpha'] = float(cols[1])
    
                elif (cols[0] == '_cell_angle_beta'):
                    data['_cell_angle_beta'] = float(cols[1])
    
                elif (cols[0] == '_cell_angle_gamma'):
                    data['_cell_angle_gamma'] = float(cols[1])
    
                elif (cols[0] == '_cell_volume'):
                    data['_cell_volume'] = float(cols[1])
    
    
                # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                # Extract the symmetry operations.  This will be a list of
                # strings such as:
                #    ['x,y,z', 'y,x,2/3-z', '-y,x-y,2/3+z', '-x,-x+y,1/3-z', ... ]
                elif (cols[0] == '_space_group_symop_operation_xyz'):
                    reading_sym_ops = True
                    data['_space_group_symop_operation_xyz'] = []
    
                elif (reading_sym_ops):
                    
                    # Add the operation if the string is between single quotes.
                    # Otherwise it's a sign we are done with the list.
                    if (cols[0][0] == '\''  and  cols[0][-1] == '\''):
                        data['_space_group_symop_operation_xyz'].append(cols[0][1:-1])
                    else:
                        reading_sym_ops = False
                        # Note: it's safe to ignore this line completely.
    
    
                # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                # Search for the keyword "_atom_site_label" which indicates the
                # start of the atom_site data.
                elif (cols[0] == '_atom_site_label') and not atom_headers:
                    
                    while len(cols) < 4:
                        atom_headers.append(cols[0])
                        data[cols[0]] = []
                        line = self._skiplines(f)
                        cols = line.split()
                    
                    # Stop reading atom sites if we found a line with fewer
                    # columns, and which does not start with '_atom_site_'.
                    while len(cols) == len(atom_headers):
                        for i, name in enumerate(atom_headers):
                            data[name].append(cols[i])
                        
                        try:
                            line = self._skiplines(f)
                        except:
                            break
                        cols = line.split()
        
        # Return the extracted data.
        return data
        
    def _extract_element(self, label):
        """Converts an "_atom_type_label" into an element name. """
    
        elem2 = ['He','Li','Be','Ne','Na','Mg','Al','Si','Cl','Ar','Ca','Sc','Ti',
                 'Cr','Mn','Fe','Co','Ni','Cu','Zn','Ga','Ge','As','Se','Br','Kr',
                 'Rb','Sr','Zr','Nb','Mo','Tc','Ru','Rh','Pd','Ag','Cd','In','Sn',
                 'Sb','Te','Xe','Cs','Ba','La','Ce','Pr','Nd','Pm','Sm','Eu','Gd',
                 'Tb','Dy','Ho','Er','Tm','Yb','Lu','Hf','Ta','Re','Os','Ir','Pt',
                 'Au','Hg','Tl','Pb','Bi','Po','At','Rn','Fr','Ra','Ac','Th','Pa',
                 'Np','Pu','Am','Cm','Bk','Cf','Es','Fm','Md','No','Lr']
    
        if (label[0:2] in elem2):
            return label[0:2]
    
        elem1 = ['H','B','C','N','O','F','P','S','K','V','Y','I','W','U']
    
        if (label[0] in elem1):
            return label[0]
    
        raise Exception('WARNING: could not convert "%s" into element name!' % label)
        return label
    
    def _convert_cif_data(self, data, ignore_overlaps=False):
        
        radians = math.radians
        cos, sin = math.cos, math.sin
        sqrt = math.sqrt
                
        # Extract lengths and angles from the CIF data.
        La = float(data['_cell_length_a'])
        Lb = float(data['_cell_length_b'])
        Lc = float(data['_cell_length_c'])
        alpha = radians(float(data['_cell_angle_alpha']))
        beta = radians(float(data['_cell_angle_beta']))
        gamma = radians(float(data['_cell_angle_gamma']))
        volume = float(data['_cell_volume'])
        
        # Extract the symmetry operations.  This will be a list of strings such as:
        #    ['x,y,z', 'y,x,2/3-z', '-y,x-y,2/3+z', '-x,-x+y,1/3-z', ... ]
        ops = data['_space_group_symop_operation_xyz']
        
        # For proper evaluation, we need to convert "2/3" into "2./3", etc. to prevent
        # integer division which would turn e.g. 2/3 into 0.
        for i in range(len(ops)):
            ops[i] = ops[i].replace("/", "./")
        
        
        # Get the atom labels and coordinates.
        labels = data['_atom_site_label']
        fX = [ float(s) for s in data['_atom_site_fract_x'] ]
        fY = [ float(s) for s in data['_atom_site_fract_y'] ]
        fZ = [ float(s) for s in data['_atom_site_fract_z'] ]
        if '_atom_site_occupancy' in data:
            occ = [ float(s) for s in data['_atom_site_occupancy'] ]
        else:
            occ = [ 1.0 for _ in data['_atom_site_label'] ]
        
        # Create a list of 4-tuples, where each tuple is an atom:
        #   [ ('Si', 0.4697, 0.0, 0.0),  ('O', 0.4135, 0.2669, 0.1191),  ... ]
        atoms = [ (labels[i], fX[i], fY[i], fZ[i], occ[i]) for i in range(len(labels)) ]
        
        # Make sure that all atoms lie within the unit cell.  Also convert names such
        # as 'Oa1' into 'O'.
        for i in range(len(atoms)):
            (name,xn,yn,zn,oc) = atoms[i]
            xn = (xn + 10.0) % 1.0
            yn = (yn + 10.0) % 1.0
            zn = (zn + 10.0) % 1.0
            name = self._extract_element(name)
            atoms[i] = (name,xn,yn,zn,oc)
            
        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        # Use symmetry operations to create the unit cell.
        
        
        # The CIF file consists of a few atom positions plus several "symmetry
        # operations" that indicate the other atom positions within the unit cell.  So
        # using these operations, create copies of the atoms until no new copies can be
        # made.
        
        
        # Two atoms are on top of each other if they are less than "eps" away.
        eps = 0.01  # in Angstrom
        
        
        # For each atom, apply each symmetry operation to create a new atom.
        overlap_atoms = []
        imax = len(atoms)
        i=0
        while (i < imax):
        
            label,x,y,z,oc = atoms[i]
        
            for op in ops:
        
                # Python is awesome: calling e.g. eval('x,y,1./2+z') will convert the
                # string into a 3-tuple using the current values for x,y,z!
                xn,yn,zn = eval(op)
        
                # Make sure that the new atom lies within the unit cell.
                xn = (xn + 10.0) % 1.0
                yn = (yn + 10.0) % 1.0
                zn = (zn + 10.0) % 1.0
        
                # Check if the new position is actually new, or the same as a previous
                # atom.
                new_atom = True
                for at in atoms:
                    if (abs(at[1]-xn) < eps  and  abs(at[2]-yn) < eps  and  abs(at[3]-zn) < eps):
                        new_atom = False
        
                        # Check that this is the same atom type.
                        if (at[0] != label):
                            if at[4] + oc != 1:
                                raise Exception('overlapping atoms do not have occupancy summing to unity')
                            overlap_atoms.append((label,xn,yn,zn,oc))
                                
                # If the atom is new, add it to the list!
                if (new_atom):
                    atoms.append( (label,xn,yn,zn,oc) )  # add a 4-tuple
        
        
            # Update the loop iterator.
            i = i + 1
            imax = len(atoms)

        if overlap_atoms and not ignore_overlaps:
            raise Exception('invalid CIF file: atom of type %s overlaps with atom of type %s' % (at[0],label))
        
        atoms+=overlap_atoms

        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        # Convert the fractional coordinates into real coordinates.
        
        
        # The primitive vectors a,b,c are such that 
        #
        #   cos(alpha) = b.c / |b||c|
        #   cos(beta)  = a.c / |a||c|
        #   cos(gamma) = a.b / |a||b|
        #
        # with the convention
        #
        #   a = La*xhat
        #   b = bx*xhat + by*yhat
        #   c = cx*xhat + cy*yhat + cz*zhat
        #
        cosa = cos(alpha)
        #sina = sin(alpha)
        cosb = cos(beta)
        #sinb = sin(beta)
        cosg = cos(gamma)
        sing = sin(gamma)
        
        cosa2 = cosa * cosa
        cosb2 = cosb * cosb
        sing2 = sing * sing
        
        ax = La
        
        bx = Lb * cosg
        by = Lb * sing
        
        cx = Lc * cosb
        cy = Lc * (cosa - cosg*cosb) / sing
        cz = Lc * sqrt( 1 - (cosa2 + cosb2 - 2*cosg*cosb*cosa) / sing2 )
        
        # Use the volume to check if we did the vectors right.
        V = ax*by*cz
        if ( abs(V - volume) > 0.1) and not np.isnan(volume):
            raise Exception('volume does not match that calculated from primitive vectors')
        
        a = np.array([ax,0,0])
        b = np.array([bx,by,0])
        c = np.array([cx,cy,cz])
    
        for i in range(len(atoms)):
        
            # Get label and fractional coordinates.
            label,xf,yf,zf,oc = atoms[i]
        
            xa = xf * ax  # contribution of a-vector to the x-coordinate of this atom
            #ya = 0       # a-vector has no y-component, so does not affect y of atom
            #za = 0       # a-vector has no z-component, so does not affect z of atom
            
            xb = yf * bx  # contribution of b-vector to the x-coordinate of this atom
            yb = yf * by  # contribution of b-vector to the y-coordinate of this atom
            #zb = 0       # b-vector has no z-component, so does not affect z of atom
        
            xc = zf * cx  # contribution of c-vector to the x-coordinate of this atom
            yc = zf * cy  # contribution of c-vector to the y-coordinate of this atom
            zc = zf * cz  # contribution of c-vector to the z-coordinate of this atom
        
            # Add all contributions.
            xn = xa + xb + xc
            yn = yb + yc
            zn = zc
        
            atoms[i] = (label, xn, yn, zn,oc)
        
        df = pd.DataFrame(atoms, columns=['type', 'x', 'y', 'z','occupancy'])
    
        return df,(0.,0.,0.),tuple(a),tuple(b),tuple(c)