Source code for ipymd.atom_manipulation

# -*- coding: utf-8 -*-
"""
Created on Mon May 16 08:15:13 2016

@author: cjs14
"""
import pandas as pd
import numpy as np
from scipy.spatial import ConvexHull
from matplotlib import cm
from matplotlib.colors import Normalize
from six import string_types

from .shared import atom_data
from .shared.transformations import transform_to_crystal, rotate_vectors

[docs]class Atom_Manipulation(object): """ a class to manipulate atom data atom_df : pandas.DataFrame containing columns; x, y, z, type """ def __init__(self, atom_df, meta_series=None,undos=1): """ a class to manipulate atom data atom_df : pandas.DataFrame containing columns; x, y, z, type meta_series : pandas.Series containing columns; origin, a, b, c to define unit cell if none it will be constructed from the min/max x, y, z values undos : int number of past dataframes to save """ assert set(atom_df.columns).issuperset(['x','y','z','type']) assert undos > 0 if meta_series is None: meta_series = pd.Series([(atom_df.x.min(),atom_df.y.min(),atom_df.z.min()), (atom_df.x.max()-atom_df.x.min(),0.,0.), (0.,atom_df.y.max()-atom_df.y.min(),0.), (0.,0.,atom_df.z.max()-atom_df.z.min())], index=['origin','a','b','c']) else: assert set(meta_series.index).issuperset(['origin','a','b','c']) self._atom_df = atom_df.copy() #the one on the left is the newest self._old_atom_df = [None]*undos self._original_atom_df = atom_df.copy() self._meta = meta_series.copy() self._old_meta = [None]*undos self._original_meta = meta_series.copy() @property def df(self): return self._atom_df.copy() @property def meta(self): return self._meta.copy() def _save(self): self._old_atom_df.pop() # remove the oldest (right) self._old_atom_df.insert(0,self._atom_df.copy()) # add newest left self._old_meta.pop() # remove the oldest (right) self._old_meta.insert(0,self._meta.copy()) # add newest left
[docs] def undo_last(self): if self._old_atom_df[0] is not None: self._atom_df = self._old_atom_df.pop(0) # get newest left self._old_atom_df.append(None) self._meta = self._old_meta.pop(0) # get newest left self._old_meta.append(None) else: raise Exception('No previous dataframes')
[docs] def revert_to_original(self): """ revert to original atom_df """ self._save() self._atom_df = self._original_atom_df.copy() self._meta = self._original_meta.copy()
[docs] def change_variables(self, map_dict, vtype='type'): """ change particular variables according to the map_dict """ self._save() self._atom_df.replace({vtype:map_dict}, inplace=True)
[docs] def change_type_variable(self, atom_type, variable, value, type_col='type'): """ change particular variable for one atom type """ self._save() if not hasattr(value, '__iter__'): # there is df.loc, df.at or df.set_value, not sure which ones best self._atom_df.set_value(self._atom_df[type_col]==atom_type, variable, value) else: self._atom_df[variable] = self._atom_df[variable].astype(object) df = self._atom_df # ensure all indexes are unique df.index.name = 'old_index' df.reset_index(drop=False,inplace=True) for indx in df.loc[df[type_col]==atom_type].index: df.set_value(indx,variable,value) self._atom_df = df.set_index('old_index')
[docs] def apply_map(self, vmap, column, default=False, type_col='type'): """ change values in a column, according to a mapping of another column Properties ---------- vmap : dict or str A dictionary mapping values, or a string associated with a column in the ipymd.shared.atom_data() dataframe (e.g. color and RVdW) column : str the column to change default : various the default value to put when the type key cannot be found, if False then the original value will not be overwritten """ if isinstance(vmap, string_types): df = atom_data() vmap = df[vmap].dropna().to_dict() self._atom_df = self._atom_df.copy() if default is not False: self._atom_df[column] = default for key, val in vmap.iteritems(): self.change_type_variable(key, column, val, type_col)
[docs] def color_by_index(self, cmap='jet', minv=None, maxv=None): """change colors to map index values cmap : string the colormap to apply, see available at http://matplotlib.org/examples/color/colormaps_reference.html minv, maxv : float optional min, max cmap value, otherwise take min, max value found in column """ colormap = cm.get_cmap(cmap) var = self._atom_df.index minval = var.min() if minv is None else minv maxval = var.max() if maxv is None else maxv norm = Normalize(minval, maxval,clip=True) self._save() self._atom_df.color = [tuple(col[:3]) for col in colormap(norm(var),bytes=True)]
[docs] def color_by_variable(self, colname, cmap='jet', minv=None, maxv=None): """change colors to map colname : string a coloumn of the dataframe that contains numbers by which to color cmap : string the colormap to apply, see available at http://matplotlib.org/examples/color/colormaps_reference.html minv, maxv : float optional min, max cmap value, otherwise take min, max value found in column """ colormap = cm.get_cmap(cmap) var = self._atom_df[colname] minval = var.min() if minv is None else minv maxval = var.max() if maxv is None else maxv norm = Normalize(minval, maxval,clip=True) self._save() self._atom_df.color = [tuple(col[:3]) for col in colormap(norm(var),bytes=True)]
[docs] def color_by_categories(self, colname, cmap='jet', sort=True): """change colors to map colname : string a column of the dataframe that contains categories by which to color cmap : string the colormap to apply, see available at http://matplotlib.org/examples/color/colormaps_reference.html """ colormap = cm.get_cmap(cmap) cats = self._atom_df[colname] unique_cats = cats.unique() if sort: unique_cats = sorted(unique_cats) num_cats = float(cats.nunique()) #potential way to always have a string the same color #for cat in unique_cats: #[(ord(c)-97)/122. for c in cat.lower() if ord(c)>=97 and ord(c)<=122] color_dict = dict([(cat,colormap(i/num_cats,bytes=True)[:3]) for i,cat in enumerate(unique_cats)]) self._save() self._atom_df.color = cats.map(color_dict)
[docs] def filter_variables(self, values, vtype='type'): if isinstance(values, int): values = [values] if isinstance(values, float): values = [values] if isinstance(values, string_types): values = [values] self._save() self._atom_df = self._atom_df[self._atom_df[vtype].isin(values)]
#------------------------ # Geometric manipulation
[docs] def repeat_cell(self,a=1,b=1,c=1,original_first=False): """ repeat atoms along a, b, c directions (and update unit cell) a : int or tuple repeats in 'a' direction, if tuple then defines repeats in -/+ direction b : int or tuple repeats in 'b' direction, if tuple then defines repeats in -/+ direction c : int or tuple repeats in 'c' direction, if tuple then defines repeats in -/+ direction original_first: bool if True, the original atoms will be first in the DataFrame """ if isinstance(a, int): arange = range(0,abs(a)+1) else: arange = range(-abs(a[0]),abs(a[1])+1) if isinstance(b, int): brange = range(0,abs(b)+1) else: brange = range(-abs(b[0]),abs(b[1])+1) if isinstance(c, int): crange = range(0,abs(c)+1) else: crange = range(-abs(c[0]),abs(c[1])+1) avec = np.asarray(self._meta.a) bvec = np.asarray(self._meta.b) cvec = np.asarray(self._meta.c) dfs = [] if original_first: dfs.append(self._atom_df.copy()) for i in arange: for j in brange: for k in crange: if i==0 and j==0 and k==0 and original_first: continue atom_copy = self._atom_df.copy() atom_copy[['x','y','z']] = (atom_copy[['x','y','z']] + i*avec + j*bvec + k*cvec) dfs.append(atom_copy) self._save() self._atom_df = pd.concat(dfs) #TODO check for identical atoms and warn origin = np.asarray(self._meta.origin) self._meta.origin = tuple(origin + arange[0]*avec + brange[0]*bvec + crange[0]*cvec) self._meta.a = tuple(avec*len(arange)) self._meta.b = tuple(bvec*len(brange)) self._meta.c = tuple(cvec*len(crange))
# def _convert_basis_abc(self, xyz): # """ convert x,y,z to a,b,c basis # # xyz : numpy.array((N,3)) # """ # origin = np.asarray(self._meta.origin) # avec = np.asarray(self._meta.a) # bvec = np.asarray(self._meta.b) # cvec = np.asarray(self._meta.c) # # lattparams_bb(np.asarray([self._meta.a,self._meta.b,self._meta.c])) # # # move to origin # xyz = xyz - origin # # # convert basis # basis = np.array([avec,bvec,cvec]).T # invbasis = np.linalg.inv(basis) # # abc = np.dot(invbasis,xyz.T).T # # return abc # def _convert_basis_xyz(self, abc): # """ convert a,b,c to x,y,z basis # # abc : numpy.array((N,3)) # """ # origin = np.asarray(self._meta.origin) # avec = np.asarray(self._meta.a) # bvec = np.asarray(self._meta.b) # cvec = np.asarray(self._meta.c) # # # convert basis # basis = np.array([avec,bvec,cvec]).T # xyz = np.dot(basis,abc.T).T # # # move from origin # xyz = xyz + origin # # return xyz
[docs] def slice_fraction(self, amin=0, amax=1, bmin=0, bmax=1, cmin=0, cmax=1, incl_max=False, update_uc=True,delta=0.01): """ slice along a,b,c directions (from origin) as fraction of vector length incl_max : bool whether to slice < (False) <= (True) max values update_uc : bool update unit cell (a,b,c,origin) to match slice delta : float retain atoms within 'delta' fraction outside of slice plane) """ self._save() abc = transform_to_crystal(self._atom_df[['x','y','z']].values, self._meta.a,self._meta.b,self._meta.c, self._meta.origin) if incl_max: mask = ((abc[:,0]>=amin-delta) & (abc[:,0]<=amax+delta) & (abc[:,1]>=bmin-delta) & (abc[:,1]<=bmax+delta) & (abc[:,2]>=cmin-delta) & (abc[:,2]<=cmax+delta)) else: mask = ((abc[:,0]>=amin-delta) & (abc[:,0]<amax+delta) & (abc[:,1]>=bmin-delta) & (abc[:,1]<bmax+delta) & (abc[:,2]>=cmin-delta) & (abc[:,2]<cmax+delta)) self._atom_df = self._atom_df[mask] if update_uc: origin = np.asarray(self._meta.origin) avec = np.asarray(self._meta.a) bvec = np.asarray(self._meta.b) cvec = np.asarray(self._meta.c) self._meta.origin = tuple(origin+amin*avec+bmin*bvec+cmin*cvec) self._meta.a = avec * (amax-amin) self._meta.b = bvec * (bmax-bmin) self._meta.c = cvec * (cmax-cmin)
[docs] def slice_absolute(self, amin=0, amax=None, bmin=0, bmax=None, cmin=0, cmax=None, incl_max=False, update_uc=True, delta=0.01): """ slice along a,b,c directions (from origin) given absolute vector length if amax, bmax or cmax is None, then will use the vector length update_uc : bool update unit cell (a,b,c,origin) to match slice incl_max : bool whether to slice < (False) <= (True) max values delta : float retain atoms within 'delta' fraction outside of slice plane) """ self._save() abc = transform_to_crystal(self._atom_df[['x','y','z']].values, self._meta.a,self._meta.b,self._meta.c, self._meta.origin) anorm = np.linalg.norm(self._meta.a) bnorm = np.linalg.norm(self._meta.b) cnorm = np.linalg.norm(self._meta.c) amax = anorm if amax is None else amax bmax = anorm if bmax is None else bmax cmax = anorm if cmax is None else cmax if incl_max: mask = ((abc[:,0]>=(amin/anorm)-delta) & (abc[:,0]<=(amax/anorm)+delta) & (abc[:,1]>=(bmin/bnorm)-delta) & (abc[:,1]<=(bmax/bnorm)+delta) & (abc[:,2]>=(cmin/cnorm)-delta) & (abc[:,2]<=(cmax/cnorm)+delta)) else: mask = ((abc[:,0]>=(amin/anorm)-delta) & (abc[:,0]<(amax/anorm)+delta) & (abc[:,1]>=(bmin/bnorm)-delta) & (abc[:,1]<(bmax/bnorm)+delta) & (abc[:,2]>=(cmin/cnorm)-delta) & (abc[:,2]<(cmax/cnorm)+delta)) self._atom_df = self._atom_df[mask] if update_uc: origin = np.asarray(self._meta.origin) avec = np.asarray(self._meta.a) bvec = np.asarray(self._meta.b) cvec = np.asarray(self._meta.c) self._meta.origin = tuple(origin+ amin*avec/anorm + bmin*bvec/bnorm + cmin*cvec/cnorm) self._meta.a = avec * (amax-amin)/anorm self._meta.b = bvec * (bmax-bmin)/bnorm self._meta.c = cvec * (cmax-cmin)/cnorm
[docs] def translate(self, vector, update_uc=True): """translate atoms by vector vector : list x, y, z translation update_uc : bool update unit cell (a,b,c,origin) to match translation """ x,y,z = vector self._save() self._atom_df.x += x self._atom_df.y += y self._atom_df.z += z if update_uc: self._meta.origin = np.array(self._meta.origin) + np.asarray(vector)
[docs] def rotate(self, angle, vector=[1,0,0], update_uc=True): """rotate the clockwise about the given axis vector by theta degrees. e.g. for rotate_atoms(90,[0,0,1]); [0,1,0] -> [1,0,0] angle : float rotation angle in degrees vector : iterable vector to rotate around [x0,y0,z0] update_uc : bool update unit cell (a,b,c,origin) to match rotation """ coords = self._atom_df[['x','y','z']].values new_coords = rotate_vectors(coords,vector,angle) self._save() self._atom_df[['x','y','z']] = new_coords if update_uc: self._meta.a = tuple(rotate_vectors(self._meta.a,vector,angle)[0]) self._meta.b = tuple(rotate_vectors(self._meta.b,vector,angle)[0]) self._meta.c = tuple(rotate_vectors(self._meta.c,vector,angle)[0])
def _pnts_in_pointcloud(self, points, new_pts): """2D or 3D returns np.array(dtype=bool) """ hull = ConvexHull(points) vol = hull.volume inside = [] for pt in new_pts: new_hull = ConvexHull(np.append(points, [pt],axis=0)) inside.append(vol == new_hull.volume) return np.array(inside)
[docs] def filter_inside_pts(self, points): """return only atoms inside the bounding shape of a set of points points : np.array((N,3)) """ self._save() inside = self._pnts_in_pointcloud(points, self._atom_df[['x','y','z']].values) self._save() self._atom_df = self._atom_df[inside]
[docs] def filter_inside_box(self, vectors, origin=np.zeros(3)): """return only atoms inside box vectors : np.array((3,3)) a, b, c vectors origin : np.array((1,3)) """ a,b,c = vectors + origin points = [origin, a, b, a+b, c, a+c, b+c, a+b+c] self.filter_inside_pts(points)
[docs] def filter_inside_hexagon(self, vectors, origin=np.zeros(3)): """return only atoms inside hexagonal prism vectors : np.array((2,3)) a, c vectors origin : np.array((1,3)) """ a, c = vectors points = [self._rotate(a, c, angle) for angle in [0,60,120,180,240,300]] points += [p + c for p in points] points = np.array(points) + origin self.filter_inside_pts(points)
[docs] def group_atoms_as_mols(self, atom_ids, name, remove_atoms=True, mean_xyz=True, color='red',transparency=1.,radius=1.): """ combine atoms into a molecule atom_ids : list of lists list of dataframe indexes for each molecule name : string name of molecule remove_atoms : bool remove the grouped atoms from the dataframe mean_xyz : bool use the mean coordinate of atoms for molecule, otherwise use coordinate of first atom """ df = self._atom_df.copy() # test no atoms in multiple molecules all_atoms = np.asarray(atom_ids).flatten() assert len(set(all_atoms))==len(all_atoms), 'atoms in multiple molecules' mol_data = [] for atoms in atom_ids: if mean_xyz: x,y,z = df.loc[atoms,['x','y','z']].mean().values else: x,y,z = df.loc[atoms[0],['x','y','z']].values mol_data.append([name,x,y,z,radius,color,transparency]) if remove_atoms: df.drop(atoms, inplace=True) if mol_data: moldf = pd.DataFrame(mol_data,columns=['type','x','y','z','radius','color','transparency']) df = pd.concat([df,moldf]) self._save() self._atom_df = df