Source code for ipymd.atom_analysis.nearest_neighbour

# -*- coding: utf-8 -*-
"""
Created on Thu Jul 14 14:05:09 2016

@author: cjs14

functions based on nearest neighbour calculations

"""
import math
import pandas as pd
import numpy as np
from scipy.spatial import cKDTree
from collections import Counter
from IPython.core.display import clear_output
import matplotlib.patches as mpatches

from .. import shared
from ..atom_manipulation import Atom_Manipulation
from ..plotting import Plotter

def _createTreeFromEdges(edges):
    """    
    e.g. _createTreeFromEdges([[1,2],[0,1],[2,3],[8,9],[0,3]])
     -> {0: [1], 1: [2, 0], 2: [1, 3], 3: [2,0], 8: [9], 9: [8]}
    """
    tree = {}
    for v1, v2 in edges:
        tree.setdefault(v1, []).append(v2)
        tree.setdefault(v2, []).append(v1)
    return tree
    
def _longest_path(start,tree,lastnode=None):
    """a recursive function to compute the maximum unbroken chain given a tree
    
    e.g. start=0, tree={0: [1], 1: [2, 0], 2: [1, 3], 3: [2,0], 8: [9], 9: [8]}
     -> [0, 1, 2, 3, 0]
    
    """
    if not start in tree:
        return []
    new_tree = tree.copy()
    #nodes = new_tree.pop(start) # can use if don't want to complete loops
    nodes = new_tree[start]
    new_tree[start] = []
        
    path = []
    for node in nodes:        
        if node==lastnode:
            continue # can't go back to lastnode, e.g. 1->2->1
        new_path = _longest_path(node,new_tree,start)
        if len(new_path) > len(path):
            path = new_path
    path.append(start)
    return path    

[docs]def guess_bonds(atoms_df, covalent_radii=None, threshold=0.1, max_length=5., radius=0.1,transparency=1.,color=None): """ guess bonds between atoms, based on approximate covalent radii Parameters ---------- atoms_df : pandas.Dataframe all atoms, requires colums ['x','y','z','type', 'color'] covalent_radii : dict or None a dict of covalent radii for each atom type, if None then taken from ipymd.shared.atom_data threshold : float include bonds with distance +/- threshold of guessed bond length (Angstrom) max_length : float maximum bond length to include (Angstrom) radius : float radius of displayed bond cylinder (Angstrom) transparency : float transparency of displayed bond cylinder color : str or tuple color of displayed bond cylinder, if None then colored by atom color Returns ------- bonds_df : pandas.Dataframe a dataframe with start/end indexes relating to atoms in atoms_df """ if atoms_df.index.tolist() != [_ for _ in range(atoms_df.shape[0])]: raise ValueError('the index for atoms_df must be in order, i.e. [0,1,2,...]') if covalent_radii is None: df = shared.atom_data() covalent_radii = df.RCov.to_dict() r_array = atoms_df[['x','y','z']].values ck = cKDTree(r_array) pairs = ck.query_pairs(max_length) bonds = [] for i,j in pairs: a, b = covalent_radii[atoms_df.iloc[i].type], covalent_radii[atoms_df.iloc[j].type] rval = a + b thr_a = rval - threshold thr_b = rval + threshold #thr_a2 = thr_a * thr_a thr_b2 = thr_b * thr_b dr2 = ((r_array[i] - r_array[j])**2).sum() # print(dr2) if dr2 < thr_b2: if color is None: bonds.append((i, j,math.sqrt(dr2),radius, atoms_df.iloc[i].color,atoms_df.iloc[j].color,transparency)) else: bonds.append((i, j,math.sqrt(dr2),radius, color,color,transparency)) return pd.DataFrame(bonds, columns=['start','end','length','radius','color_start','color_end','transparency'])
[docs]def bond_lengths(atoms_df, coord_type, lattice_type, max_dist=4, max_coord=16, repeat_meta=None, rounded=2, min_dist=0.01, leafsize=100): """ calculate the unique bond lengths atoms in coords_atoms, w.r.t lattice_atoms atoms_df : pandas.Dataframe all atoms coord_type : string atoms to calcualte coordination of lattice_type : string atoms to act as lattice for coordination max_dist : float maximum distance for coordination consideration max_coord : float maximum possible coordination number repeat_meta : pandas.Series include consideration of repeating boundary idenfined by a,b,c in the meta data min_dist : float lattice points within this distance of the atom will be ignored (assumed self-interaction) leafsize : int points at which the algorithm switches to brute-force (kdtree specific) Returns ------- distances : set list of unique distances """ if not coord_type in atoms_df.type.values or not lattice_type in atoms_df.type.values: return set([]) coord_df = Atom_Manipulation(atoms_df,repeat_meta) coord_df.filter_variables(coord_type) lattice_df = Atom_Manipulation(atoms_df,repeat_meta) lattice_df.filter_variables(lattice_type) if repeat_meta is not None: lattice_df.repeat_cell((-1,1),(-1,1),(-1,1)) lattice_tree = cKDTree(lattice_df.df[['x','y','z']].values, leafsize=leafsize) all_dists,all_ids = lattice_tree.query(coord_df.df[['x','y','z']].values, k=max_coord, distance_upper_bound=max_dist) distances = [] for dists in all_dists: for d in dists: if d > min_dist and not np.isinf(d): distances.append(round(d,rounded)) return sorted(set(distances))
[docs]def coordination(coord_atoms_df, lattice_atoms_df, max_dist=4, max_coord=16, repeat_meta=None, min_dist=0.01, leafsize=100): """ calculate the coordination number of each atom in coords_atoms, w.r.t lattice_atoms coords_atoms_df : pandas.Dataframe atoms to calcualte coordination of lattice_atoms_df : pandas.Dataframe atoms to act as lattice for coordination max_dist : float maximum distance for coordination consideration max_coord : float maximum possible coordination number repeat_meta : pandas.Series include consideration of repeating boundary idenfined by a,b,c in the meta data min_dist : float lattice points within this distance of the atom will be ignored (assumed self-interaction) leafsize : int points at which the algorithm switches to brute-force (kdtree specific) Returns ------- coords : list list of coordination numbers """ lattice_df = Atom_Manipulation(lattice_atoms_df,repeat_meta) if repeat_meta is not None: lattice_df.repeat_cell((-1,1),(-1,1),(-1,1)) lattice_tree = cKDTree(lattice_df.df[['x','y','z']].values, leafsize=leafsize) all_dists,all_ids = lattice_tree.query(coord_atoms_df[['x','y','z']].values, k=max_coord, distance_upper_bound=max_dist) coords = [] for dists in all_dists: coords.append(np.count_nonzero(np.logical_and(dists>min_dist, dists<np.inf))) return coords
[docs]def coordination_bytype(atoms_df, coord_type, lattice_type, max_dist=4, max_coord=16, repeat_meta=None, min_dist=0.01, leafsize=100): """ returns dataframe with additional column for the coordination number of each atom of coord type, w.r.t lattice_type atoms effectively an extension of calc_df_coordination atoms_df : pandas.Dataframe all atoms coord_type : string atoms to calcualte coordination of lattice_type : string atoms to act as lattice for coordination max_dist : float maximum distance for coordination consideration max_coord : float maximum possible coordination number repeat_meta : pandas.Series include consideration of repeating boundary idenfined by a,b,c in the meta data min_dist : float lattice points within this distance of the atom will be ignored (assumed self-interaction) leafsize : int points at which the algorithm switches to brute-force (kdtree specific) Returns ------- df : pandas.Dataframe copy of atoms_df with new column named coord_{coord_type}_{lattice_type} """ df = atoms_df.copy() df['coord_{0}_{1}'.format(coord_type, lattice_type)] = np.nan if not coord_type in df.type.values or not lattice_type in df.type.values: return df coord_df = Atom_Manipulation(df) coord_df.filter_variables(coord_type) lattice_df = Atom_Manipulation(df) lattice_df.filter_variables(lattice_type) coords = coordination(coord_df.df,lattice_df.df,max_dist, max_coord, repeat_meta, min_dist, leafsize) df.loc[df['type']==coord_type,'coord_{0}_{1}'.format(coord_type, lattice_type)] = coords return df
[docs]def compare_to_lattice(atoms_df, lattice_atoms_df, max_dist=10,leafsize=100): """ calculate the minimum distance of each atom in atoms_df from a lattice point in lattice_atoms_df atoms_df : pandas.Dataframe atoms to calculate for lattice_atoms_df : pandas.Dataframe atoms to act as lattice points max_dist : float maximum distance for consideration in computation leafsize : int points at which the algorithm switches to brute-force (kdtree specific) Returns ------- distances : list list of distances to nearest atom in lattice """ lattice_tree = cKDTree(lattice_atoms_df[['x','y','z']].values, leafsize=leafsize) dists,idnums = lattice_tree.query(atoms_df[['x','y','z']].values, k=1, distance_upper_bound=max_dist) return dists
[docs]def vacancy_identification(atoms_df, res=0.2, nn_dist=2., repeat_meta=None, remove_dups=True, color='red',transparency=1.,radius=1, type_name='Vac', leafsize=100, n_jobs=1, ipython_progress=False, ): """ identify vacancies atoms_df : pandas.Dataframe atoms to calculate for res : float resolution of vacancy identification, i.e. spacing of reference lattice nn_dist : float maximum nearest-neighbour distance considered as a vacancy repeat_meta : pandas.Series include consideration of repeating boundary idenfined by a,b,c in the meta data remove_dups : bool only keep one vacancy site within the nearest-neighbour distance leafsize : int points at which the algorithm switches to brute-force (kdtree specific) n_jobs : int, optional Number of jobs to schedule for parallel processing. If -1 is given all processors are used. ipython_progress : bool print progress to IPython Notebook Returns ------- vac_df : pandas.DataFrame new atom dataframe of vacancy sites as atoms """ xmin, xmax = atoms_df.x.min(),atoms_df.x.max() ymin, ymax = atoms_df.y.min(),atoms_df.y.max() zmin, zmax = atoms_df.z.min(),atoms_df.z.max() xyz = np.mgrid[xmin:xmax:res, ymin:ymax:res, zmin:zmax:res].reshape(3,-1).T if repeat_meta is not None: repeat = Atom_Manipulation(atoms_df,repeat_meta) repeat.repeat_cell((-1,1),(-1,1),(-1,1),original_first=True) lattice_df = repeat.df else: lattice_df = atoms_df if ipython_progress: clear_output() print('creating nearest neighbour tree') lattice_tree = cKDTree(lattice_df[['x','y','z']].values, leafsize=leafsize) if ipython_progress: clear_output() print('assessing nearest neighbours') dists,idnums = lattice_tree.query(xyz, k=1, distance_upper_bound=nn_dist,n_jobs=n_jobs) vac_list = [] for atom,dist in zip(xyz,dists): if np.isinf(dist): x,y,z = atom vac_list.append([type_name,x,y,z,radius,color,transparency]) df = pd.DataFrame(vac_list,columns=['type','x','y','z','radius','color','transparency']) if remove_dups and df.shape[0]>0: vac_tree = cKDTree(df[['x','y','z']].values) pairs = np.asarray(list(vac_tree.query_pairs(nn_dist))) #drop first atom of each pair if pairs.shape[0] > 0: df.drop(pairs[:,0],inplace=True) if ipython_progress: clear_output() return df
#TODO group atoms into specified molecules e.g. S2 or CaCO3 # http://chemwiki.ucdavis.edu/Textbook_Maps/Inorganic_Chemistry_Textbook_Maps/Map%3A_Inorganic_Chemistry_(Wikibook)/Chapter_08%3A_Ionic_and_Covalent_Solids_-_Structures/8.2%3A_Structures_related_to_NaCl_and_NiAs # maybe supply central atom type(s) and 'other' atoms type(s), filter df by required atom types, # then find nearest neighbours of central (removing molecule each time) # create molecule x,y,z from average of central atoms #http://www.ovito.org/manual/particles.modifiers.common_neighbor_analysis.html #https://www.quora.com/Given-a-set-of-atomic-types-and-coordinates-from-an-MD-simulation-is-there-a-good-algorithm-for-determining-its-likely-crystal-structure?__filter__=all&__nsrc__=2&__snid3__=179254150 # http://iopscience.iop.org/article/10.1088/0965-0393/20/4/045021/pdf
[docs]def common_neighbour_analysis(atoms_df, upper_bound=4, max_neighbours=24, repeat_meta=None, leafsize=100, ipython_progress=False): """ compute atomic environment of each atom in atoms_df Based on Faken, Daniel and Jónsson, Hannes, 'Systematic analysis of local atomic structure combined with 3D computer graphics', March 1994, DOI: 10.1016/0927-0256(94)90109-0 ideally: - FCC = 12 x 4,2,1 - HCP = 6 x 4,2,1 & 6 x 4,2,2 - BCC = 6 x 6,6,6 & 8 x 4,4,4 - icosahedral = 12 x 5,5,5 Paramaters ---------- repeat_meta : pandas.Series include consideration of repeating boundary idenfined by a,b,c in the meta data ipython_progress : bool print progress to IPython Notebook Returns ------- df : pandas.Dataframe copy of atoms_df with new column named cna """ df = atoms_df.copy() max_id = df.shape[0] - 1 # starts at 0 if repeat_meta is not None: repeat = Atom_Manipulation(df,repeat_meta) repeat.repeat_cell((-1,1),(-1,1),(-1,1),original_first=True) lattice_df = repeat.df else: lattice_df = df if ipython_progress: print('creating nearest neighbours dictionary') # create nearest neighbours dictionary lattice_tree = cKDTree(lattice_df[['x','y','z']].values, leafsize=leafsize) all_dists,all_ids = lattice_tree.query(lattice_df[['x','y','z']].values, k=max_neighbours+1, distance_upper_bound=upper_bound) nn_ids = {} #nn_dists = {} for dists,ids in zip(all_dists,all_ids): mask = np.logical_and(dists>0.01, dists<np.inf) # assume first id is of that atom, i.e. dists[0]==0 assert dists[0]==0, dists nn_ids[ids[0]] = ids[mask] #nn_dists[ids[0]] = dists[mask] jkls = {} for lid, nns in nn_ids.iteritems(): if lid > max_id: continue if ipython_progress: clear_output() print('assessing nearest neighbours: {0} of {1}'.format(lid,max_id)) jkls[lid] = [] for nn in nns: # j is number of shared nearest neighbours common_nns = set(nn_ids[nn]).intersection(nns) j = len(common_nns) # k is number of bonds between nearest neighbours nn_bonds = [] for common_nn in common_nns: for nn_bond in set(nn_ids[common_nn]).intersection(common_nns): if sorted((common_nn, nn_bond)) not in nn_bonds: nn_bonds.append(sorted((common_nn, nn_bond))) k = len(nn_bonds) # l is longest chain of nearest neighbour bonds tree = _createTreeFromEdges(nn_bonds) chain_lengths = [0] for node in tree.iterkeys(): chain_lengths.append(len(_longest_path(node, tree))-1) l = max(chain_lengths) jkls[lid].append('{0},{1},{2}'.format(j,k,l)) jkls[lid] = Counter(jkls[lid]) df['cna'] = [jkls[key] for key in sorted(jkls)] if ipython_progress: clear_output() return df
def _equala(i, j, accuracy): return j*accuracy <= i <= j+j*(1-accuracy)
[docs]def cna_categories(atoms_df, accuracy=1., upper_bound=4, max_neighbours=24, repeat_meta=None, leafsize=100, ipython_progress=False): """ compute summed atomic environments of each atom in atoms_df Based on Faken, Daniel and Jónsson, Hannes, 'Systematic analysis of local atomic structure combined with 3D computer graphics', March 1994, DOI: 10.1016/0927-0256(94)90109-0 signatures: - FCC = 12 x 4,2,1 - HCP = 6 x 4,2,1 & 6 x 4,2,2 - BCC = 6 x 6,6,6 & 8 x 4,4,4 - Diamond = 12 x 5,4,3 & 4 x 6,6,3 - Icosahedral = 12 x 5,5,5 Parameters ---------- accuracy : float 0 to 1 how accurate to fit to signature repeat_meta : pandas.Series include consideration of repeating boundary idenfined by a,b,c in the meta data ipython_progress : bool print progress to IPython Notebook Returns ------- df : pandas.Dataframe copy of atoms_df with new column named cna """ df = common_neighbour_analysis(atoms_df, upper_bound, max_neighbours, repeat_meta, leafsize=leafsize, ipython_progress=ipython_progress) cnas = df.cna.values atype = [] for counter in cnas: if _equala(counter['4,2,1'],6,accuracy) and _equala(counter['4,2,2'],6,accuracy): atype.append('HCP') elif _equala(counter['4,2,1'],12,accuracy): atype.append('FCC') elif _equala(counter['6,6,6'],8,accuracy) and _equala(counter['4,4,4'],6,accuracy): atype.append('BCC') elif _equala(counter['5,4,3'],12,accuracy) and _equala(counter['6,6,3'],4,accuracy): atype.append('Diamond') elif _equala(counter['5,5,5'],12,accuracy): atype.append('Icosahedral') else: atype.append('Other') df.cna = atype return df
[docs]def cna_sum(atoms_df, upper_bound=4, max_neighbours=24, repeat_meta=None, leafsize=100, ipython_progress=False): """ compute summed atomic environments of each atom in atoms_df Based on Faken, Daniel and Jónsson, Hannes, 'Systematic analysis of local atomic structure combined with 3D computer graphics', March 1994, DOI: 10.1016/0927-0256(94)90109-0 common signatures: - FCC = 12 x 4,2,1 - HCP = 6 x 4,2,1 & 6 x 4,2,2 - BCC = 6 x 6,6,6 & 8 x 4,4,4 - Diamond = 12 x 5,4,3 & 4 x 6,6,3 - Icosahedral = 12 x 5,5,5 Parameters ---------- repeat_meta : pandas.Series include consideration of repeating boundary idenfined by a,b,c in the meta data ipython_progress : bool print progress to IPython Notebook Returns ------- counter : Counter a counter of cna signatures """ df = common_neighbour_analysis(atoms_df, upper_bound, max_neighbours, repeat_meta, leafsize=leafsize, ipython_progress=ipython_progress) cnas = df.cna.values return sum(cnas,Counter())
#TODO move plotting to plotting module
[docs]def cna_plot(atoms_df, upper_bound=4, max_neighbours=24, repeat_meta=None, leafsize=100, barwidth=1, ipython_progress=False): """ compute summed atomic environments of each atom in atoms_df Based on Faken, Daniel and Jónsson, Hannes, 'Systematic analysis of local atomic structure combined with 3D computer graphics', March 1994, DOI: 10.1016/0927-0256(94)90109-0 common signatures: - FCC = 12 x 4,2,1 - HCP = 6 x 4,2,1 & 6 x 4,2,2 - BCC = 6 x 6,6,6 & 8 x 4,4,4 - Diamond = 12 x 5,4,3 & 4 x 6,6,3 - Icosahedral = 12 x 5,5,5 Parameters ---------- repeat_meta : pandas.Series include consideration of repeating boundary idenfined by a,b,c in the meta data ipython_progress : bool print progress to IPython Notebook Returns ------- plot : matplotlib.pyplot a matplotlib plot """ df = common_neighbour_analysis(atoms_df, upper_bound, max_neighbours, repeat_meta, leafsize=leafsize, ipython_progress=ipython_progress) cnas = df.cna.values counter = sum(cnas,Counter()) labels, values = zip(*counter.items()) indexes = np.arange(len(labels)) colors = [] patches = [] d = {'4,2,1':['orange','FCC or HCP (1 of 2)'], '4,2,2':['red','HCP (1 of 2)'], '6,6,6':['green','BCC (1 of 2)'], '4,4,4':['green','BCC (2 of 2)'], '5,5,5':['purple','Icosahedral'], '5,4,3':['grey','Diamond (1 of 2)'], '6,6,3':['grey','Diamond (1 of 2)']} for label in labels: if label in d: colors.append(d[label][0]) patches.append(mpatches.Patch(color=d[label][0], label=d[label][1])) else: colors.append('blue') plot = Plotter() plot.axes.barh(indexes, values, barwidth, color=colors) plot.axes.set_yticks(indexes + barwidth * 0.5, labels) plot.axes.grid(True) if patches: plot.axes.legend(handles=patches) plot.axes.set_ylabel('i,j,k') return plot
#TODO _group_molecules needs work def _group_molecules(atom_df,moltypes,maxdist=3,repeat_meta=None, mean_xyz=True,remove_atoms=True, color='red',transparency=1.,radius=1., leafsize=100): molname = ''.join(['{}_{}'.format(k,v) for k,v in Counter(moltypes).iteritems()]) search_df = atom_df[atom_df.type.isin([moltypes[0]])].copy() #old_index = search_df.index search_df.reset_index(inplace=True) if repeat_meta is not None: manip = Atom_Manipulation(search_df,repeat_meta) manip.repeat_cell((-1,1), (-1,1), (-1, 1),original_first=True) lattice_df = manip.df lattice_df.reset_index(inplace=True,drop=True) rep_map = dict(zip(range(lattice_df.shape[0]),list(search_df.index)*27)) else: lattice_df = search_df.copy() rep_map = dict(zip(range(search_df.shape[0]),list(search_df.index))) lattice_tree = cKDTree(lattice_df[['x','y','z']].values, leafsize=leafsize) dists,idnums = lattice_tree.query(search_df[['x','y','z']].values, k=len(moltypes), distance_upper_bound=maxdist) mol_data = [] used_repeat = [] for i,dist,idnum in zip(search_df.index, dists,idnums): #print i, old_index[i], dist, [rep_map[m] for m in idnum] if i in used_repeat: continue mol = [i] for j, d in zip(idnum[1:],dist[1:]): if not j in mol and not np.isinf(d) and not rep_map.get(j) in used_repeat: #used_repeat.append(rep_map[j]) mol.append(j) else: print('warning incomplete molecule')#, idnum, dist, j, rep_map.get(j) repeat_mol = [rep_map[m] for m in mol] if len(mol) == len(moltypes): used_repeat.extend(repeat_mol) if mean_xyz: x,y,z = search_df.loc[repeat_mol,['x','y','z']].mean().values else: x,y,z = search_df.loc[i,['x','y','z']].values mol_data.append([molname,x,y,z,radius,color,transparency]) #print i, repeat_mol df = atom_df.copy() if remove_atoms: df.drop(used_repeat, inplace=True) if mol_data: moldf = pd.DataFrame(mol_data,columns=['type','x','y','z','radius','color','transparency']) df = pd.concat([df,moldf]) return df