Monday, April 15, 2013

How to deploy an Elasticsearch cluster easily

Here is a simple sh allowing you to deploy ElasticSearch on multiple servers with dedicated roles: master, slave or monitor.

-Master: can be an Elasticsearch master, acts as load balancer on the cluster, doesn't store data and can use the http transport.
-Slave: a data node, can not be an Elasticsearch master and can not use the http transport.
-Monitor: doesn't store datacan not be an Elasticsearch master, hold plugins and can use the http transport. By default, uses the paramedic plugin.

#!/bin/bash

slaves=("ip2" "ip3" "ip5")
monitors=("ip4")
masters=("ip1")

install_path=/home/user/opt/elasticsearch
cluster_name="Our Search"
index_number_of_shards=20
path_data=/data/d1/index
version=0.20.6

es_heap_master=3000
es_heap_monitor=2000
es_heap_slave=3000

#let's do it!
nodes=( ${slaves[@]} ${monitors[@]} ${masters[@]})

echo "Preparing cluster $cluster_name: ${nodes[@]}"

for node in ${nodes[@]}
do
echo "Preparing node $node"
ssh root@$node "$install_path/current/bin/service/elasticsearch stop
rm -rf $install_path; 
mkdir -p $install_path; 
cd $install_path; 
wget -t 7 --waitretry=14 --random-wait -q http://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-${version}.tar.gz; 
tar -xf $install_path/elasticsearch-${version}.tar.gz; 
ln -s $install_path/elasticsearch-${version} $install_path/current;
cd $install_path/current/bin;
wget -t 7 --waitretry=14 --random-wait -q https://github.com/elasticsearch/elasticsearch-servicewrapper/archive/master.zip;
unzip -q master.zip;
mv elasticsearch-servicewrapper-master/service . ;
rm -rf elasticsearch-servicewrapper* master.zip*";
echo "Node $node ready!"
done

gateway_expected_nodes=$((${#slaves[@]} + ${#monitors[@]} + ${#masters[@]}))

discovery_zen_ping_unicast_hosts=$(printf ",\"%s\"" "${masters[@]}")
discovery_zen_ping_unicast_hosts=${discovery_zen_ping_unicast_hosts:1}

#for master
for master in ${masters[@]}
do
echo "Installing master $master"
ssh root@$master "echo 'node.name: master ($master)
node.data: false
http.enabled: true
node.master: true
bootstrap.mlockall: true
index.number_of_shards: $index_number_of_shards
cluster.name: $cluster_name
path.data: $path_data
transport.tcp.compress: true
gateway.expected_nodes: $gateway_expected_nodes
discovery.zen.ping.multicast.enabled: false
discovery.zen.ping.unicast.hosts: [$discovery_zen_ping_unicast_hosts]' > $install_path/current/config/elasticsearch.yml"

ssh root@$master "sed -i 's#^\(set.default.ES_HOME\s*=\s*\).*\$#\1$install_path/current#' $install_path/current/bin/service/elasticsearch.conf"
ssh root@$master "sed -i 's#^\(set.default.ES_HEAP_SIZE\s*=\s*\).*\$#\1$es_heap_master#' $install_path/current/bin/service/elasticsearch.conf"

echo "Master $master installed, starting..."
ssh root@$master "$install_path/current/bin/service/elasticsearch start"
echo "Master $master started!"
done

#for slaves
for slave in ${slaves[@]}
do
echo "Installing slave $slave"
ssh root@$slave "echo 'node.name: data ($slave)
node.data: true
http.enabled: false
node.master: false
bootstrap.mlockall: true
index.number_of_shards: $index_number_of_shards
cluster.name: $cluster_name
path.data: $path_data
transport.tcp.compress: true
gateway.expected_nodes: $gateway_expected_nodes
discovery.zen.ping.multicast.enabled: false
discovery.zen.ping.unicast.hosts: [$discovery_zen_ping_unicast_hosts]' > $install_path/current/config/elasticsearch.yml"

ssh root@$slave "sed -i 's#^\(set.default.ES_HOME\s*=\s*\).*\$#\1$install_path/current#' $install_path/current/bin/service/elasticsearch.conf"
ssh root@$slave "sed -i 's#^\(set.default.ES_HEAP_SIZE\s*=\s*\).*\$#\1$es_heap_slave#' $install_path/current/bin/service/elasticsearch.conf"

echo "Slave $slave installed, starting..."
ssh root@$slave "$install_path/current/bin/service/elasticsearch start"
echo "Slave $slave started!"
done

#for monitors
for monitor in ${monitors[@]}
do
echo "Installing monitor $monitor"

ssh root@$monitor "$install_path/current/bin/plugin -install karmi/elasticsearch-paramedic"

ssh root@$monitor "echo 'node.name: monitor ($monitor)
node.data: false
http.enabled: true
node.master: false
bootstrap.mlockall: true
index.number_of_shards: $index_number_of_shards
cluster.name: $cluster_name
path.data: $path_data
transport.tcp.compress: true
gateway.expected_nodes: $gateway_expected_nodes
discovery.zen.ping.multicast.enabled: false
discovery.zen.ping.unicast.hosts: [$discovery_zen_ping_unicast_hosts]' > $install_path/current/config/elasticsearch.yml"

ssh root@$monitor "sed -i 's#^\(set.default.ES_HOME\s*=\s*\).*\$#\1$install_path/current#' $install_path/current/bin/service/elasticsearch.conf"
ssh root@$monitor "sed -i 's#^\(set.default.ES_HEAP_SIZE\s*=\s*\).*\$#\1$es_heap_monitor#' $install_path/current/bin/service/elasticsearch.conf"

echo "Monitor $monitor installed, starting..."
ssh root@$monitor "$install_path/current/bin/service/elasticsearch start"
echo "Monitor $monitor started!"
done

Tuesday, April 9, 2013

Transfert files from Hadoop to a remote server via ssh

When working with Hadoop, you produce files in the hdfs. In order to copy them in one of your remote servers, you have to first use the get or the copyToLocal command to copy the files in your local filesystem and then use a scp command. But this two steps process is not really efficient since you are double-copying the files.

sshj is a pure Java implementation of SSHv2 allowing you to connect to an sshd server and use port forwarding, X11 forwarding, file transfer, etc... But sshj can't read or write in hdfs...

Fortunately, the SCPFileTransfert class (http://schmizz.net/sshj/javadoc/0.5.0/net/schmizz/sshj/xfer/scp/SCPFileTransfer.html) allows you to create your own LocalSourceFile. So let's do it:



import java.io.IOException;
import java.io.InputStream;
import java.util.LinkedList;
import java.util.List;

import net.schmizz.sshj.xfer.LocalFileFilter;
import net.schmizz.sshj.xfer.LocalSourceFile;

import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class FileSystemSourceFile implements LocalSourceFile {

private FileSystem fs;

private FileStatus rootStatus;

public FileSystemSourceFile(FileSystem fs, String path) throws IOException {
this.fs = fs;
rootStatus = fs.getFileStatus(new Path(path));
}

public FileSystemSourceFile(FileSystem fs, FileStatus rootStatus)
throws IOException {
this.fs = fs;
this.rootStatus = rootStatus;
}

public String getName() {
return rootStatus.getPath().getName();
}

public long getLength() {
return rootStatus.getLen();
}

public InputStream getInputStream() throws IOException {
return fs.open(rootStatus.getPath());
}

public int getPermissions() throws IOException {
return rootStatus.getPermission().toShort();
}

public boolean isFile() {
return rootStatus.isFile();
}

public boolean isDirectory() {
return rootStatus.isDirectory();
}

public Iterable<? extends LocalSourceFile> getChildren(
LocalFileFilter filter) throws IOException {

FileStatus[] shopStatuses = fs.listStatus(rootStatus.getPath());

final List<FileSystemSourceFile> children = new LinkedList<FileSystemSourceFile>();
for (FileStatus f : shopStatuses) {
children.add(new FileSystemSourceFile(fs, f));
}
return children;
}

public boolean providesAtimeMtime() {
return true;
}

public long getLastAccessTime() throws IOException {
return rootStatus.getAccessTime() / 1000;
}

public long getLastModifiedTime() throws IOException {
return rootStatus.getModificationTime() / 1000;
}

}


And that's all! Now you can use it:


String host = ...;
String username = ...;
String password = ...;
String basePath = ...;
String hdfsPathSource = ...;
LOGGER.debug("Copy export to {}:{}@{}:{}", new Object[] { username,
password, host, basePath });

SSHClient ssh = new SSHClient();
ssh.useCompression();
ssh.loadKnownHosts();
ssh.connect(host);
ssh.authPassword(username, password);

try {
SCPFileTransfer transfert = ssh.newSCPFileTransfer();
transfert.upload(new FileSystemSourceFile(fs, hdfsPathSource),
basePath);
} finally {
ssh.close();
}

More infos here:
https://github.com/shikhar/sshj
http://schmizz.net/sshj/javadoc/0.5.0/net/schmizz/sshj/xfer/scp/SCPFileTransfer.html
http://hadoop.apache.org/docs/r0.18.1/hdfs_shell.html#get

Thursday, March 14, 2013

Deploy an application on AppFog using Cloudfoundry vmc tool or Maven plugin

AppFog is a simple Cloud Platform for web applications. It's built on Cloud Foundry, the Open Platform as a Service Project. Like the Cloud Foundry command-line interface (known as 'vmc'), AppFog offers a CLI tool, 'af' based on CloudFoundry's 'vmc' but includes features specific to AppFog and has the default target set to AppFog's service.

But if you want to use 'vmc' with the AppFog Service or the Cloudfoundry Maven plugin, you will be rejected when creating your application with something like: Error 701: The URIs: "your app name".appfog.com have already been taken or reserved.

This is because AppFog need to know your choosen underlying infrastructure in this list:

-AWS US East - Virginia
-AWS EU West - Ireland
-AWS Asia SE - Singapore
-Rackspace AZ 1 - Dallas
-HP AZ 2 - Las Vegas
-MS Azure AZ 1 - San Antonio

This can be done with the --infra parameters, but this is only available for the 'af' CLI tool, not for 'vmc'.

Then, instead of specify your application deployed url with .appfog.com as target base, you have to choose the right target base for your infrastructure:

-AWS US East - Virginia -> .aws.af.cm
-AWS EU West - Ireland -> .eu01.aws.af.cm
-AWS Asia SE - Singapore -> .ap01.aws.af.cm
-Rackspace AZ 1 - Dallas -> .rs.af.cm
-HP AZ 2 - Las Vegas -> .hp.af.cm

You will get for instance for AWS US East in the command line style:

vincentdevillers@alienware: vmc push

Would you like to deploy from the current directory?

Application Name: my-little-blog 
Detected a Standalone Application, is this correct? [Yn]: 
1: java
2: node04
3: node06
4: node08
5: php
6: python2
7: ruby18
8: ruby192
9: ruby193
Select Runtime [java]: 
Selected java
Start Command: bin/start.sh 
Application Deployed URL [None]: my-little-blog.eu01.aws.af.cm
Memory reservation (128M, 256M, 512M, 1G, 2G) [512M]: 2G
How many instances? [1]: 
Create services to bind to 'my-little-blog'? [yN]: 
Would you like to save this configuration? [yN]: 
Creating Application: OK
Uploading Application:
  Checking for available resources: OK
  Processing resources: OK
  Packing application: OK
  Uploading (11M): OK 
Push Status: OK
Staging Application 'my-little-blog': OK                                   
Starting Application 'my-little-blog': OK 


And for your Maven plugin, you will have for instance, for AWS US East again:


<plugin>
    <groupId>org.cloudfoundry</groupId>
    <artifactId>cf-maven-plugin</artifactId>
    <version>1.0.0.M4</version>
    <configuration>
        <server>mycloudfoundry-instance</server>
        <target>https://api.appfog.com</target>
        <url>hello-java-maven.aws.af.cm</url>
        <memory>256</memory>
    </configuration>
</plugin>

That's all!

More infos here:
- https://github.com/cloudfoundry/vcap-java-client/tree/master/cloudfoundry-maven-plugin
- blog.appfog.com/how-to-use-appfogs-multi-infrastructure-architecture-in-the-command-line/
https://www.appfog.com/

Sunday, February 24, 2013

A best Spring RestTemplate!

The RestTemplate is the central class for client-side HTTP access. It simplifies communication with HTTP servers, and enforces RESTful principles. It handles HTTP connections, leaving application code to provide URLs (with possible template variables) and extract results.

The template uses a ClientHttpRequestFactory for creating HTTP connections. The default ClientHttpRequestFactory is a SimpleClientHttpRequestFactory that uses standard J2SE classes like java.net.HttpURLConnection. And this is not really production ready... (why?) 

So, the solution is to use the org.springframework.http.client.HttpComponentsClientHttpRequestFactory, which is a ClientHttpRequestFactory delegating the creation of the requests to an HttpClient.

The result is:


import java.util.List;

import javax.inject.Inject;

import org.apache.http.HttpHost;
import org.apache.http.client.HttpClient;
import org.apache.http.conn.routing.HttpRoute;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.PoolingClientConnectionManager;
import org.apache.http.params.CoreConnectionPNames;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.http.client.ClientHttpRequestFactory;
import org.springframework.http.client.HttpComponentsClientHttpRequestFactory;
import org.springframework.http.converter.HttpMessageConverter;
import org.springframework.http.converter.json.MappingJackson2HttpMessageConverter;
import org.springframework.web.client.RestTemplate;

import com.fasterxml.jackson.databind.ObjectMapper;

@Configuration
public class HttpConfig {

private static final int DEFAULT_MAX_TOTAL_CONNECTIONS = 100;

private static final int DEFAULT_MAX_CONNECTIONS_PER_ROUTE = 5;

private static final int DEFAULT_READ_TIMEOUT_MILLISECONDS = (60 * 1000);

@Inject
private ObjectMapper objectMapper;

@Bean
public ClientHttpRequestFactory httpRequestFactory() {
return new HttpComponentsClientHttpRequestFactory(httpClient());
}

@Bean
public RestTemplate restTemplate() {
RestTemplate restTemplate = new RestTemplate(httpRequestFactory());
List<HttpMessageConverter<?>> converters = restTemplate
.getMessageConverters();

for (HttpMessageConverter<?> converter : converters) {
if (converter instanceof MappingJackson2HttpMessageConverter) {
MappingJackson2HttpMessageConverter jsonConverter = (MappingJackson2HttpMessageConverter) converter;
jsonConverter.setObjectMapper(objectMapper);
}
}

return restTemplate;
}

@Bean
public HttpClient httpClient() {

PoolingClientConnectionManager connectionManager = new PoolingClientConnectionManager();
HttpClient defaultHttpClient = new DefaultHttpClient(connectionManager);

connectionManager.setMaxTotal(DEFAULT_MAX_TOTAL_CONNECTIONS);
connectionManager
.setDefaultMaxPerRoute(DEFAULT_MAX_CONNECTIONS_PER_ROUTE);
connectionManager.setMaxPerRoute(new HttpRoute(new HttpHost(
"facebook.com")), 20);
connectionManager.setMaxPerRoute(new HttpRoute(new HttpHost(
"twitter.com")), 20);
connectionManager.setMaxPerRoute(new HttpRoute(new HttpHost(
"linkedin.com")), 20);
connectionManager.setMaxPerRoute(new HttpRoute(new HttpHost(
"viadeo.com")), 20);

defaultHttpClient.getParams().setIntParameter(
CoreConnectionPNames.CONNECTION_TIMEOUT,
DEFAULT_READ_TIMEOUT_MILLISECONDS);
return defaultHttpClient;
}
}

Use Cloudfoundry in Spring without the dedicated XML namespace

Spring allows you to configure a dead simple connection on a provisioned service, like mysql, redis, rabbitmq and many others. It's really simple, since you don't even need to configure explicit credentials and connection strings. Instead, you can retrieve a reference to this service from the cloud itself using the CloudFoundry “cloud” namespace.

Problem is, you have to create an XML file, and we have more and more projects without XML anymore. So, how to use the Cloudfoundry integration without XML?

Like the XML version, you need the Maven dep:

<dependency> 
<groupId>org.cloudfoundry</groupId> 
<artifactId>cloudfoundry-runtime</artifactId> 
<version>0.8.1</version> 
</dependency>

The package org.cloudfoundry.runtime.service offers 4 implementations of AbstractCloudServiceFactory:
- CloudDataSourceFactory
- CloudMongoDbFactoryBean
- CloudRabbitConnectionFactoryBean
- CloudRedisConnectionFactoryBean

So, depending of the type of your service, choose the right implementation and create a configuration class. Here is an example using Redis:


import javax.inject.Inject;

import org.cloudfoundry.runtime.service.CloudPoolConfiguration;
import org.cloudfoundry.runtime.service.keyvalue.CloudRedisConnectionFactoryBean;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.ComponentScan;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Profile;
import org.springframework.core.annotation.Order;
import org.springframework.core.env.Environment;
import org.springframework.data.redis.connection.RedisConnectionFactory;
import org.springframework.data.redis.connection.jedis.JedisConnectionFactory;
import org.springframework.data.redis.core.StringRedisTemplate;


@Configuration
@ComponentScan(basePackages = { "..." })
@Order(1)
public class KeyValueConfig {

@Inject
private RedisConnectionFactory redisConnectionFactory;

@Bean
public StringRedisTemplate redisTemplate() {
StringRedisTemplate stringRedisTemplate = new StringRedisTemplate(
redisConnectionFactory);
return stringRedisTemplate;
}

/**
* Properties to support the local and test mode of operation.
*/
@Configuration
@Profile({ Profiles.LOCAL, Profiles.TEST, Profiles.PROD })
static class Default {

@Inject
private Environment environment;

@Bean
public RedisConnectionFactory redisConnectionFactory() {
JedisConnectionFactory redis = new JedisConnectionFactory();
redis.setHostName(environment.getProperty("redis.hostname"));
redis.setPort(environment.getProperty("redis.port", Integer.class));
redis.setPassword(environment.getProperty("redis.password"));
redis.setUsePool(true);
return redis;
}
}

/**
* Properties to support the cloud mode of operation.
*/
@Configuration
@Profile(Profiles.CLOUDFOUNDRY)
static class Cloud {

@Bean
public RedisConnectionFactory redisConnectionFactory() throws Exception {
CloudPoolConfiguration cloudPoolConfiguration = new CloudPoolConfiguration();
cloudPoolConfiguration.setPoolSize("3-5");

CloudRedisConnectionFactoryBean factory = new CloudRedisConnectionFactoryBean();
factory.setCloudPoolConfiguration(cloudPoolConfiguration);

return factory.getObject();
}
}

}

And that's all!

Saturday, March 24, 2012

Automatic retry on failure


What the need?
To make processing more robust and less prone to failure, sometimes it helps to automatically retry a failed operation in case it might succeed on a subsequent attempt. For example a call to a web service that fails because of a network glitch or a DeadLockLoserException in a database update may resolve themselves after a short wait. 
So, how to...?
To automate the retry of such operations the Spring team developped a retry module in Spring Batch under the package org.springframework.batch.retry. 
From the doc: Client code is not aware of the details of when and how many times to retry the operation, and various strategies for those details are available. The decision about whether to retry or abandon lies with the Framework, but is parameterisable through some retry meta data.Retryable operations are usually transactional, but this can be provided by a normal transaction template or interceptor (transaction meta data are independent of the retry meta data).
Since some months, these classes are available in a dedicated project on Github.
So, to begin, add this dependency in your pom.xml: 

<dependency>
<groupId>org.springframework.retry</groupId>
<artifactId>spring-retry</artifactId>
<version>1.0.0.RELEASE</version>
</dependency>



Then, if you want to handle exceptions, you just have to use the RetryTemplate:
RetryTemplate template = new RetryTemplate();
template.setRetryPolicy(new TimeoutRetryPolicy(30000L));
Foo result = template.execute(new RetryCallback<Foo>() {
    public Foo doWithRetry(RetryContext context) {
        // Do stuff that might fail, e.g. webservice operation
        return result;
    }
});
Inside a RetryTemplate the decision to retry or fail in the execute method is determined by a RetryPolicy:
RetryPolicy retryPolicy = new SimpleRetryPolicy(3, Collections.<Class<? extends Throwable>, Boolean> singletonMap(SocialException.class, true));
retryTemplate = new RetryTemplate();
retryTemplate.setRetryPolicy(retryPolicy);


In this case, all the exceptions extending SocialException will be retried 3 times max.
Enjoy!
Source:


Thursday, March 15, 2012

Using the new Spring Cache Abstraction in Spring Security


What the need?

Caching is handled by storing the UserDetails object being placed in the UserCache. This ensures that subsequent requests with the same username can be validated without needing to query the UserDetailsService. It should be noted that if a user appears to present an incorrect password, the UserDetailsService will be queried to confirm the most up-to-date password was used for comparison. 

Caching is only likely to be required for stateless applications. In a normal web application, for example, the SecurityContext is stored in the user's session and the user isn't reauthenticated on each request. 

The default cache implementation is therefore NullUserCache. An other implementation uses EhCache directly from the EhCache CacheManager. But why not deal with the new Spring Cache Abstraction implemented since the 3.1?

The solution

Here is a solution implementing the UserCache interface: 

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.cache.Cache;
import org.springframework.cache.Cache.ValueWrapper;
import org.springframework.security.core.userdetails.UserCache;
import org.springframework.security.core.userdetails.UserDetails;
import org.springframework.util.Assert;

public class AdaptableUserCache implements UserCache, InitializingBean {

private static final Logger LOGGER = LoggerFactory.getLogger(AdaptableUserCache.class);

private Cache cache;

public AdaptableUserCache(Cache cache) {
this.cache = cache;
}

public void afterPropertiesSet() throws Exception {
Assert.notNull(cache, "cache mandatory");
}

@Override
public UserDetails getUserFromCache(String username) {
ValueWrapper element = cache.get(username.toLowerCase());

if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Cache hit: " + (element != null) + "; username: " + username.toLowerCase());
}

if (element == null) {
return null;
} else {
return (UserDetails) element.get();
}
}

@Override
public void putUserInCache(UserDetails user) {
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Cache put: " + user.getUsername().toLowerCase());
}

cache.put(user.getUsername().toLowerCase(), user);
}

@Override
public void removeUserFromCache(String username) {
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Cache remove: " + username.toLowerCase());
}
cache.evict(username.toLowerCase());
}
}

Then edit your SecurityConfig class in this way:

@Configuration
@ImportResource("classpath:spring/security.xml")
@ComponentScan(basePackages = "...")
public class SecurityConfig {

@Inject
private DaoAuthenticationProvider authenticationProvider;

@Inject
private CacheManager cacheManager;
  ...
@Bean
public UserCache userCache() throws Exception {
AdaptableUserCache userCache = new AdaptableUserCache(cacheManager.getCache("users"));
authenticationProvider.setUserCache(userCache);
return userCache;
}
}

Well done!