# -*- coding: utf-8 -*-
"""
Created on Wed May 18 22:24:10 2016
@author: cjs14
adapted from from http://physics.bu.edu/~erikl/research/tools/crystals/read_cif.py
"""
import os
import math
import numpy as np
import pandas as pd
from .base import DataInput
[docs]class CIF(DataInput):
""" Build a crystal from a Crystallographic Information File (.cif)
"""
[docs] def setup_data(self, file_path, override_abc=[], ignore_overlaps=False):
""" Build a crystal from a Crystallographic Information File (.cif)
Parameters
-----------
file_path : str
path to file
override_abc : [] or [a,b,c]
if not empty, will override a, b, c length parameters given in file
Notes
-----
here is a typical example of a CIF file:
_cell_length_a 4.916
_cell_length_b 4.916
_cell_length_c 5.4054
_cell_angle_alpha 90
_cell_angle_beta 90
_cell_angle_gamma 120
_cell_volume 113.131
_exptl_crystal_density_diffrn 2.646
_symmetry_space_group_name_H-M 'P 32 2 1'
loop_
_space_group_symop_operation_xyz
'x,y,z'
'y,x,2/3-z'
'-y,x-y,2/3+z'
'-x,-x+y,1/3-z'
'-x+y,-x,1/3+z'
'x-y,-y,-z'
loop_
_atom_site_label
_atom_site_fract_x
_atom_site_fract_y
_atom_site_fract_z
Si 0.46970 0.00000 0.00000
O 0.41350 0.26690 0.11910
"""
self._file_path = file_path
self._ignore_overlaps = ignore_overlaps
self._override_abc = override_abc
self._data_set = True
def _get_atom_data(self, step):
""" return atom data
"""
cif_data = self._read_cif_file(self._file_path)
if len(self._override_abc)==3:
cif_data['_cell_length_a'] = self._override_abc[0]
cif_data['_cell_length_b'] = self._override_abc[1]
cif_data['_cell_length_c'] = self._override_abc[2]
cif_data['_cell_volume'] = np.nan
atoms_df,origin,a,b,c = self._convert_cif_data(cif_data, self._ignore_overlaps)
self._add_colors(atoms_df)
self._add_radii(atoms_df)
return atoms_df
def _get_meta_data(self, step):
""" return pandas.Series of origin, a, b & c coordinates
"""
cif_data = self._read_cif_file(self._file_path)
if len(self._override_abc)==3:
cif_data['_cell_length_a'] = self._override_abc[0]
cif_data['_cell_length_b'] = self._override_abc[1]
cif_data['_cell_length_c'] = self._override_abc[2]
cif_data['_cell_volume'] = np.nan
atoms_df,origin,a,b,c = self._convert_cif_data(cif_data, self._ignore_overlaps)
return pd.Series([origin,a,b,c],index=['origin','a','b','c'])
def _count_configs(self):
""" return int of total number of atomic configurations """
return 1
#TODO read and deal with occupancy values (and overlapping atoms)
def _read_cif_file(self, file_path):
"""
Read CIF file, and extract the necessary info in the form of a dictionary.
E.g., the value of "_cell_volume" can be found with data['_cell_volume'].
"""
assert os.path.exists(file_path), '{0} does not exist'.format(file_path)
data = {}
# Open the CIF file.
with open(file_path, 'r') as f:
reading_sym_ops = False
atom_headers = []
# Read lines one by one.
for line in f:
# Split into columns.
cols = line.split()
if (len(cols) == 0): continue
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Identify the keyword. Here are the simply "key: value" ones.
if (cols[0] == '_cell_length_a'):
data['_cell_length_a'] = float(cols[1])
elif (cols[0] == '_cell_length_b'):
data['_cell_length_b'] = float(cols[1])
elif (cols[0] == '_cell_length_c'):
data['_cell_length_c'] = float(cols[1])
elif (cols[0] == '_cell_angle_alpha'):
data['_cell_angle_alpha'] = float(cols[1])
elif (cols[0] == '_cell_angle_beta'):
data['_cell_angle_beta'] = float(cols[1])
elif (cols[0] == '_cell_angle_gamma'):
data['_cell_angle_gamma'] = float(cols[1])
elif (cols[0] == '_cell_volume'):
data['_cell_volume'] = float(cols[1])
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Extract the symmetry operations. This will be a list of
# strings such as:
# ['x,y,z', 'y,x,2/3-z', '-y,x-y,2/3+z', '-x,-x+y,1/3-z', ... ]
elif (cols[0] == '_space_group_symop_operation_xyz'):
reading_sym_ops = True
data['_space_group_symop_operation_xyz'] = []
elif (reading_sym_ops):
# Add the operation if the string is between single quotes.
# Otherwise it's a sign we are done with the list.
if (cols[0][0] == '\'' and cols[0][-1] == '\''):
data['_space_group_symop_operation_xyz'].append(cols[0][1:-1])
else:
reading_sym_ops = False
# Note: it's safe to ignore this line completely.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Search for the keyword "_atom_site_label" which indicates the
# start of the atom_site data.
elif (cols[0] == '_atom_site_label') and not atom_headers:
while len(cols) < 4:
atom_headers.append(cols[0])
data[cols[0]] = []
line = self._skiplines(f)
cols = line.split()
# Stop reading atom sites if we found a line with fewer
# columns, and which does not start with '_atom_site_'.
while len(cols) == len(atom_headers):
for i, name in enumerate(atom_headers):
data[name].append(cols[i])
try:
line = self._skiplines(f)
except:
break
cols = line.split()
# Return the extracted data.
return data
def _extract_element(self, label):
"""Converts an "_atom_type_label" into an element name. """
elem2 = ['He','Li','Be','Ne','Na','Mg','Al','Si','Cl','Ar','Ca','Sc','Ti',
'Cr','Mn','Fe','Co','Ni','Cu','Zn','Ga','Ge','As','Se','Br','Kr',
'Rb','Sr','Zr','Nb','Mo','Tc','Ru','Rh','Pd','Ag','Cd','In','Sn',
'Sb','Te','Xe','Cs','Ba','La','Ce','Pr','Nd','Pm','Sm','Eu','Gd',
'Tb','Dy','Ho','Er','Tm','Yb','Lu','Hf','Ta','Re','Os','Ir','Pt',
'Au','Hg','Tl','Pb','Bi','Po','At','Rn','Fr','Ra','Ac','Th','Pa',
'Np','Pu','Am','Cm','Bk','Cf','Es','Fm','Md','No','Lr']
if (label[0:2] in elem2):
return label[0:2]
elem1 = ['H','B','C','N','O','F','P','S','K','V','Y','I','W','U']
if (label[0] in elem1):
return label[0]
raise Exception('WARNING: could not convert "%s" into element name!' % label)
return label
def _convert_cif_data(self, data, ignore_overlaps=False):
radians = math.radians
cos, sin = math.cos, math.sin
sqrt = math.sqrt
# Extract lengths and angles from the CIF data.
La = float(data['_cell_length_a'])
Lb = float(data['_cell_length_b'])
Lc = float(data['_cell_length_c'])
alpha = radians(float(data['_cell_angle_alpha']))
beta = radians(float(data['_cell_angle_beta']))
gamma = radians(float(data['_cell_angle_gamma']))
volume = float(data['_cell_volume'])
# Extract the symmetry operations. This will be a list of strings such as:
# ['x,y,z', 'y,x,2/3-z', '-y,x-y,2/3+z', '-x,-x+y,1/3-z', ... ]
ops = data['_space_group_symop_operation_xyz']
# For proper evaluation, we need to convert "2/3" into "2./3", etc. to prevent
# integer division which would turn e.g. 2/3 into 0.
for i in range(len(ops)):
ops[i] = ops[i].replace("/", "./")
# Get the atom labels and coordinates.
labels = data['_atom_site_label']
fX = [ float(s) for s in data['_atom_site_fract_x'] ]
fY = [ float(s) for s in data['_atom_site_fract_y'] ]
fZ = [ float(s) for s in data['_atom_site_fract_z'] ]
if '_atom_site_occupancy' in data:
occ = [ float(s) for s in data['_atom_site_occupancy'] ]
else:
occ = [ 1.0 for _ in data['_atom_site_label'] ]
# Create a list of 4-tuples, where each tuple is an atom:
# [ ('Si', 0.4697, 0.0, 0.0), ('O', 0.4135, 0.2669, 0.1191), ... ]
atoms = [ (labels[i], fX[i], fY[i], fZ[i], occ[i]) for i in range(len(labels)) ]
# Make sure that all atoms lie within the unit cell. Also convert names such
# as 'Oa1' into 'O'.
for i in range(len(atoms)):
(name,xn,yn,zn,oc) = atoms[i]
xn = (xn + 10.0) % 1.0
yn = (yn + 10.0) % 1.0
zn = (zn + 10.0) % 1.0
name = self._extract_element(name)
atoms[i] = (name,xn,yn,zn,oc)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Use symmetry operations to create the unit cell.
# The CIF file consists of a few atom positions plus several "symmetry
# operations" that indicate the other atom positions within the unit cell. So
# using these operations, create copies of the atoms until no new copies can be
# made.
# Two atoms are on top of each other if they are less than "eps" away.
eps = 0.01 # in Angstrom
# For each atom, apply each symmetry operation to create a new atom.
overlap_atoms = []
imax = len(atoms)
i=0
while (i < imax):
label,x,y,z,oc = atoms[i]
for op in ops:
# Python is awesome: calling e.g. eval('x,y,1./2+z') will convert the
# string into a 3-tuple using the current values for x,y,z!
xn,yn,zn = eval(op)
# Make sure that the new atom lies within the unit cell.
xn = (xn + 10.0) % 1.0
yn = (yn + 10.0) % 1.0
zn = (zn + 10.0) % 1.0
# Check if the new position is actually new, or the same as a previous
# atom.
new_atom = True
for at in atoms:
if (abs(at[1]-xn) < eps and abs(at[2]-yn) < eps and abs(at[3]-zn) < eps):
new_atom = False
# Check that this is the same atom type.
if (at[0] != label):
if at[4] + oc != 1:
raise Exception('overlapping atoms do not have occupancy summing to unity')
overlap_atoms.append((label,xn,yn,zn,oc))
# If the atom is new, add it to the list!
if (new_atom):
atoms.append( (label,xn,yn,zn,oc) ) # add a 4-tuple
# Update the loop iterator.
i = i + 1
imax = len(atoms)
if overlap_atoms and not ignore_overlaps:
raise Exception('invalid CIF file: atom of type %s overlaps with atom of type %s' % (at[0],label))
atoms+=overlap_atoms
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Convert the fractional coordinates into real coordinates.
# The primitive vectors a,b,c are such that
#
# cos(alpha) = b.c / |b||c|
# cos(beta) = a.c / |a||c|
# cos(gamma) = a.b / |a||b|
#
# with the convention
#
# a = La*xhat
# b = bx*xhat + by*yhat
# c = cx*xhat + cy*yhat + cz*zhat
#
cosa = cos(alpha)
#sina = sin(alpha)
cosb = cos(beta)
#sinb = sin(beta)
cosg = cos(gamma)
sing = sin(gamma)
cosa2 = cosa * cosa
cosb2 = cosb * cosb
sing2 = sing * sing
ax = La
bx = Lb * cosg
by = Lb * sing
cx = Lc * cosb
cy = Lc * (cosa - cosg*cosb) / sing
cz = Lc * sqrt( 1 - (cosa2 + cosb2 - 2*cosg*cosb*cosa) / sing2 )
# Use the volume to check if we did the vectors right.
V = ax*by*cz
if ( abs(V - volume) > 0.1) and not np.isnan(volume):
raise Exception('volume does not match that calculated from primitive vectors')
a = np.array([ax,0,0])
b = np.array([bx,by,0])
c = np.array([cx,cy,cz])
for i in range(len(atoms)):
# Get label and fractional coordinates.
label,xf,yf,zf,oc = atoms[i]
xa = xf * ax # contribution of a-vector to the x-coordinate of this atom
#ya = 0 # a-vector has no y-component, so does not affect y of atom
#za = 0 # a-vector has no z-component, so does not affect z of atom
xb = yf * bx # contribution of b-vector to the x-coordinate of this atom
yb = yf * by # contribution of b-vector to the y-coordinate of this atom
#zb = 0 # b-vector has no z-component, so does not affect z of atom
xc = zf * cx # contribution of c-vector to the x-coordinate of this atom
yc = zf * cy # contribution of c-vector to the y-coordinate of this atom
zc = zf * cz # contribution of c-vector to the z-coordinate of this atom
# Add all contributions.
xn = xa + xb + xc
yn = yb + yc
zn = zc
atoms[i] = (label, xn, yn, zn,oc)
df = pd.DataFrame(atoms, columns=['type', 'x', 'y', 'z','occupancy'])
return df,(0.,0.,0.),tuple(a),tuple(b),tuple(c)