Computing SOAP featuresΒΆ
This examples shows how to compute the SOAP power spectrum descriptor for each atom in each system of a provided systems file. The path to the systems file is taken from the first command line argument.
In the end, the descriptor is transformed in a way compatible with how most classic machine learning (such as PCA or linear regression) work.
The workflow is the same for every provided descriptor. Take a look at the Reference guides for a list with all descriptors and their specific parameters.
You can obtain a testing dataset from our website
.
import chemfiles
from rascaline import SoapPowerSpectrum
Read systems using chemfiles. You can obtain the dataset used in this
example from our website
.
with chemfiles.Trajectory("dataset.xyz") as trajectory:
systems = [s for s in trajectory]
Rascaline can also handles systems read by ASE using
systems = ase.io.read("dataset.xyz", ":")
.
We can now define hyper parameters for the calculation
HYPER_PARAMETERS = {
"cutoff": 5.0,
"max_radial": 6,
"max_angular": 4,
"atomic_gaussian_width": 0.3,
"center_atom_weight": 1.0,
"radial_basis": {
"Gto": {},
},
"cutoff_function": {
"ShiftedCosine": {"width": 0.5},
},
}
calculator = SoapPowerSpectrum(**HYPER_PARAMETERS)
And then run the actual calculation, including gradients with respect to positions
descriptor = calculator.compute(systems, gradients=["positions"])
The descriptor is a metatensor TensorMap
, containing multiple blocks. We
can transform it to a single block containing a dense representation, with one
sample for each atom-centered environment by using keys_to_samples
and
keys_to_properties
print("before: ", len(descriptor.keys))
descriptor = descriptor.keys_to_samples("center_type")
descriptor = descriptor.keys_to_properties(["neighbor_1_type", "neighbor_2_type"])
print("after: ", len(descriptor.keys))
before: 40
after: 1
you can now use descriptor.block().values
as the input of a machine
learning algorithm
print(descriptor.block().values.shape)
(1380, 1800)
use metatensor::Labels;
use rascaline::{Calculator, System, CalculationOptions};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// load the systems from command line argument
let path = std::env::args().nth(1).expect("expected a command line argument");
let systems = rascaline::systems::read_from_file(path)?;
// transform systems into a vector of trait objects (`Vec<Box<dyn System>>`)
let mut systems = systems.into_iter()
.map(|s| Box::new(s) as Box<dyn System>)
.collect::<Vec<_>>();
// pass hyper-parameters as JSON
let parameters = r#"{
"cutoff": 5.0,
"max_radial": 6,
"max_angular": 4,
"atomic_gaussian_width": 0.3,
"center_atom_weight": 1.0,
"radial_basis": {
"Gto": {}
},
"cutoff_function": {
"ShiftedCosine": {"width": 0.5}
}
}"#;
// create the calculator with its name and parameters
let mut calculator = Calculator::new("soap_power_spectrum", parameters.to_owned())?;
// create the options for a single calculation, here we request the
// calculation of gradients with respect to positions
let options = CalculationOptions {
gradients: &["positions"],
..Default::default()
};
// run the calculation
let descriptor = calculator.compute(&mut systems, options)?;
// Transform the descriptor to dense representation, with one sample for
// each atom-centered environment, and the neighbor atomic types part of the
// properties
let keys_to_move = Labels::empty(vec!["center_type"]);
let descriptor = descriptor.keys_to_samples(&keys_to_move, /* sort_samples */ true)?;
let keys_to_move = Labels::empty(vec!["neighbor_1_type", "neighbor_2_type"]);
let descriptor = descriptor.keys_to_properties(&keys_to_move, /* sort_samples */ true)?;
// descriptor now contains a single block, which can be used as the input
// to standard ML algorithms
let values = descriptor.block_by_id(0).values().to_array();
println!("SOAP representation shape: {:?}", values.shape());
Ok(())
}
#include <iostream>
#include <rascaline.hpp>
int main(int argc, char* argv[]) {
if (argc < 2) {
std::cout << "error: expected a command line argument" << std::endl;
return 1;
}
auto systems = rascaline::BasicSystems(argv[1]);
// pass hyper-parameters as JSON
const char* parameters = R"({
"cutoff": 5.0,
"max_radial": 6,
"max_angular": 4,
"atomic_gaussian_width": 0.3,
"center_atom_weight": 1.0,
"gradients": false,
"radial_basis": {
"Gto": {}
},
"cutoff_function": {
"ShiftedCosine": {"width": 0.5}
}
})";
// create the calculator with its name and parameters
auto calculator = rascaline::Calculator("soap_power_spectrum", parameters);
// run the calculation
auto descriptor = calculator.compute(systems);
// The descriptor is a metatensor `TensorMap`, containing multiple blocks.
// We can transform it to a single block containing a dense representation,
// with one sample for each atom-centered environment.
descriptor.keys_to_samples("center_type");
descriptor.keys_to_properties(std::vector<std::string>{"neighbor_1_type", "neighbor_2_type"});
// extract values from the descriptor in the only remaining block
auto block = descriptor.block_by_id(0);
auto values = block.values();
// you can now use values as the input of a machine learning algorithm
return 0;
}
#include <assert.h>
#include <stdbool.h>
#include <stdio.h>
#include <rascaline.h>
#include <metatensor.h>
static mts_tensormap_t* move_keys_to_samples(mts_tensormap_t* descriptor, const char* keys_to_move[], size_t keys_to_move_len);
static mts_tensormap_t* move_keys_to_properties(mts_tensormap_t* descriptor, const char* keys_to_move[], size_t keys_to_move_len);
int main(int argc, char* argv[]) {
int status = RASCAL_SUCCESS;
rascal_calculator_t* calculator = NULL;
rascal_system_t* systems = NULL;
uintptr_t n_systems = 0;
double* values = NULL;
const uintptr_t* shape = NULL;
uintptr_t shape_count = 0;
bool got_error = true;
const char* keys_to_samples[] = {"center_type"};
const char* keys_to_properties[] = {"neighbor_1_type", "neighbor_2_type"};
// use the default set of options, computing all samples and all features,
// and including gradients with respect to positions
rascal_calculation_options_t options = {0};
const char* gradients_list[] = {"positions"};
options.gradients = gradients_list;
options.gradients_count = 1;
mts_tensormap_t* descriptor = NULL;
mts_block_t* block = NULL;
mts_array_t array = {0};
// hyper-parameters for the calculation as JSON
const char* parameters = "{\n"
"\"cutoff\": 5.0,\n"
"\"max_radial\": 6,\n"
"\"max_angular\": 4,\n"
"\"atomic_gaussian_width\": 0.3,\n"
"\"center_atom_weight\": 1.0,\n"
"\"gradients\": false,\n"
"\"radial_basis\": {\n"
" \"Gto\": {}\n"
"},\n"
"\"cutoff_function\": {\n"
" \"ShiftedCosine\": {\"width\": 0.5}\n"
"}\n"
"}";
// load systems from command line arguments
if (argc < 2) {
printf("error: expected a command line argument");
goto cleanup;
}
status = rascal_basic_systems_read(argv[1], &systems, &n_systems);
if (status != RASCAL_SUCCESS) {
printf("Error: %s\n", rascal_last_error());
goto cleanup;
}
// create the calculator with its name and parameters
calculator = rascal_calculator("soap_power_spectrum", parameters);
if (calculator == NULL) {
printf("Error: %s\n", rascal_last_error());
goto cleanup;
}
// run the calculation
status = rascal_calculator_compute(
calculator, &descriptor, systems, n_systems, options
);
if (status != RASCAL_SUCCESS) {
printf("Error: %s\n", rascal_last_error());
goto cleanup;
}
// The descriptor is a metatensor `TensorMap`, containing multiple blocks.
// We can transform it to a single block containing a dense representation,
// with one sample for each atom-centered environment.
descriptor = move_keys_to_samples(descriptor, keys_to_samples, 1);
if (descriptor == NULL) {
printf("Error: %s\n", mts_last_error());
goto cleanup;
}
descriptor = move_keys_to_properties(descriptor, keys_to_properties, 2);
if (descriptor == NULL) {
printf("Error: %s\n", mts_last_error());
goto cleanup;
}
// extract the unique block and corresponding values from the descriptor
status = mts_tensormap_block_by_id(descriptor, &block, 0);
if (status != MTS_SUCCESS) {
printf("Error: %s\n", mts_last_error());
goto cleanup;
}
status = mts_block_data(block, &array);
if (status != MTS_SUCCESS) {
printf("Error: %s\n", mts_last_error());
goto cleanup;
}
// callback the functions on the mts_array_t to extract the shape/data pointer
status = array.shape(array.ptr, &shape, &shape_count);
if (status != MTS_SUCCESS) {
printf("Error: %s\n", mts_last_error());
goto cleanup;
}
status = array.data(array.ptr, &values);
if (status != MTS_SUCCESS) {
printf("Error: %s\n", mts_last_error());
goto cleanup;
}
if (status != MTS_SUCCESS) {
printf("Error: %s\n", mts_last_error());
goto cleanup;
}
assert(shape_count == 2);
// you can now use `values` as the input of a machine learning algorithm
printf("the value array shape is %lu x %lu\n", shape[0], shape[1]);
got_error = false;
cleanup:
mts_tensormap_free(descriptor);
rascal_calculator_free(calculator);
rascal_basic_systems_free(systems, n_systems);
if (got_error) {
return 1;
} else {
return 0;
}
}
mts_tensormap_t* move_keys_to_samples(mts_tensormap_t* descriptor, const char* keys_to_move[], size_t keys_to_move_len) {
mts_labels_t keys = {0};
mts_tensormap_t* moved_descriptor = NULL;
keys.names = keys_to_move;
keys.size = keys_to_move_len;
keys.values = NULL;
keys.count = 0;
moved_descriptor = mts_tensormap_keys_to_samples(descriptor, keys, true);
mts_tensormap_free(descriptor);
return moved_descriptor;
}
mts_tensormap_t* move_keys_to_properties(mts_tensormap_t* descriptor, const char* keys_to_move[], size_t keys_to_move_len) {
mts_labels_t keys = {0};
mts_tensormap_t* moved_descriptor = NULL;
keys.names = keys_to_move;
keys.size = keys_to_move_len;
keys.values = NULL;
keys.count = 0;
moved_descriptor = mts_tensormap_keys_to_properties(descriptor, keys, true);
mts_tensormap_free(descriptor);
return moved_descriptor;
}