`

ES-Hadoop学习笔记-Storm交互

阅读更多

elasticsearch-Hadoop提供ElasticSearch与Apache Storm的集成支持。从ElasticSearch读取的数据是以Storm里Tuple的形式进行操作处理。

依赖版本信息:

<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-core</artifactId>
<version>1.0.1</version>
</dependency>

<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-starter</artifactId>
<version>1.0.1</version>
</dependency>

<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-hdfs</artifactId>
<version>1.0.1</version>
</dependency>

<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-kafka</artifactId>
<version>1.0.1</version>
</dependency>

<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.10</artifactId>
<version>0.10.0.0</version>
</dependency>

<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch-hadoop</artifactId>
<version>2.3.2</version>
</dependency>


Strom的extlib目录下jar包


 

 

[java] view plain copy
 
 在CODE上查看代码片派生到我的代码片
  1. import java.util.Map;  
  2.   
  3. import org.apache.storm.task.OutputCollector;  
  4. import org.apache.storm.task.TopologyContext;  
  5. import org.apache.storm.topology.OutputFieldsDeclarer;  
  6. import org.apache.storm.topology.base.BaseRichBolt;  
  7. import org.apache.storm.tuple.Fields;  
  8. import org.apache.storm.tuple.Tuple;  
  9. import org.apache.storm.tuple.Values;  
  10.   
  11. public class HandleBolt extends BaseRichBolt {  
  12.   
  13.     private static final long serialVersionUID = 1L;  
  14.   
  15.     private OutputCollector collector = null;  
  16.       
  17.     @SuppressWarnings("rawtypes")  
  18.     @Override  
  19.     public void prepare(Map stormConf, TopologyContext context,  
  20.             OutputCollector collector) {  
  21.         this.collector = collector;  
  22.     }  
  23.   
  24.     @Override  
  25.     public void execute(Tuple input) {  
  26.         String name = "NA";  
  27.         if (input.contains("name")) {  
  28.             name = input.getStringByField("name");  
  29.         }  
  30.         String phone = "NA";  
  31.         if (input.contains("phone")) {  
  32.             phone = input.getStringByField("phone");  
  33.         }  
  34.         String rcall = "NA";  
  35.         if (input.contains("rcall")) {  
  36.             rcall = input.getStringByField("rcall");  
  37.             rcall = null == rcall || "null".equals(rcall) ? "NA" : rcall;  
  38.         }  
  39.         String address = "NA";  
  40.         if (input.contains("address")) {  
  41.             address = input.getStringByField("address");  
  42.             address = null == address || "null".equals(address) ? "NA" : address;  
  43.         }  
  44.         String email = "NA";  
  45.         if (input.contains("email")) {  
  46.             email = input.getStringByField("email");  
  47.             email = null == email || "null".equals(email) ? "NA" : email;  
  48.         }  
  49.         String idCard = "NA";  
  50.         if (input.contains("idCard")) {  
  51.             idCard = input.getStringByField("idCard");  
  52.             idCard = null == idCard || "null".equals(idCard) ? "NA" : idCard;  
  53.         }  
  54.         this.collector.emit(new Values(name, phone, rcall, address, email, idCard));  
  55.         this.collector.ack(input);  
  56.     }  
  57.   
  58.     @Override  
  59.     public void declareOutputFields(OutputFieldsDeclarer declarer) {  
  60.         declarer.declare(new Fields("name""phone""rcal""address""email""idCard"));  
  61.     }  
  62.   
  63. }  

 

[java] view plain copy
 
 在CODE上查看代码片派生到我的代码片
  1. import java.util.HashMap;  
  2. import java.util.Map;  
  3.   
  4. import org.apache.storm.Config;  
  5. import org.apache.storm.LocalCluster;  
  6. import org.apache.storm.StormSubmitter;  
  7. import org.apache.storm.hdfs.bolt.HdfsBolt;  
  8. import org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat;  
  9. import org.apache.storm.hdfs.bolt.format.DelimitedRecordFormat;  
  10. import org.apache.storm.hdfs.bolt.format.FileNameFormat;  
  11. import org.apache.storm.hdfs.bolt.format.RecordFormat;  
  12. import org.apache.storm.hdfs.bolt.rotation.FileRotationPolicy;  
  13. import org.apache.storm.hdfs.bolt.rotation.TimedRotationPolicy;  
  14. import org.apache.storm.hdfs.bolt.rotation.TimedRotationPolicy.TimeUnit;  
  15. import org.apache.storm.hdfs.bolt.sync.CountSyncPolicy;  
  16. import org.apache.storm.hdfs.bolt.sync.SyncPolicy;  
  17. import org.apache.storm.starter.bolt.PrinterBolt;  
  18. import org.apache.storm.topology.TopologyBuilder;  
  19. import org.apache.storm.utils.Utils;  
  20.   
  21. public class ES2StormTopology {  
  22.   
  23.     private static final String TOPOLOGY_NAME = "es-storm-topology";  
  24.       
  25.     public static void main(String[] args) {  
  26.         if (args.length != 1) {  
  27.             System.exit(0);  
  28.         }  
  29.         boolean isCluster = Boolean.parseBoolean(args[0]);  
  30.           
  31.         TopologyBuilder builder = new TopologyBuilder();  
  32.         String target = "operator/telecom";  
  33.         String query = "?q=*";  
  34.         Map<Object, Object> configuration = new HashMap<Object, Object>();  
  35.         configuration.put("es.nodes""192.168.10.20:9200");  
  36.         configuration.put("es.read.field.include""name,phone,rcall,email,idCard,zipCode,address");  
  37.         configuration.put("es.storm.spout.fields""name,phone,rcall,email,idCard,zipCode,address");  
  38.         builder.setSpout("es-storm-spout"new ESSpout(target, query, configuration), 1);  
  39.           
  40.         builder.setBolt("storm-print-bolt"new PrinterBolt()).shuffleGrouping("es-storm-spout");  
  41.           
  42.         builder.setBolt("storm-handle-bolt"new HandleBolt()).shuffleGrouping("es-storm-spout");  
  43.           
  44.         RecordFormat recordFormat = new DelimitedRecordFormat().withFieldDelimiter(":");  
  45.         SyncPolicy syncPolicy = new CountSyncPolicy(10);  
  46.         FileRotationPolicy fileRotationPolicy = new TimedRotationPolicy(1.0f, TimeUnit.MINUTES);  
  47.         FileNameFormat fileNameFormat = new DefaultFileNameFormat().withPath("/storm/")  
  48.                 .withPrefix("es_").withExtension(".log");  
  49.         HdfsBolt hdfsBolt = new HdfsBolt().withFsUrl("hdfs://centos.host1:9000")  
  50.                 .withFileNameFormat(fileNameFormat).withRecordFormat(recordFormat)  
  51.                 .withRotationPolicy(fileRotationPolicy).withSyncPolicy(syncPolicy);  
  52.         builder.setBolt("storm-hdfs-bolt", hdfsBolt).globalGrouping("storm-handle-bolt");  
  53.           
  54.         Config config = new Config();  
  55.         config.setDebug(true);  
  56.         if (isCluster) {  
  57.             try {  
  58.                 config.setNumWorkers(3);  
  59.                 StormSubmitter.submitTopologyWithProgressBar(  
  60.                         TOPOLOGY_NAME, config, builder.createTopology());  
  61.             } catch (Exception e) {  
  62.                 e.printStackTrace();  
  63.             }  
  64.         } else {  
  65.             LocalCluster cluster = new LocalCluster();  
  66.             cluster.submitTopology(TOPOLOGY_NAME, config, builder.createTopology());  
  67.             Utils.sleep(100000);  
  68.             cluster.killTopology(TOPOLOGY_NAME);  
  69.             cluster.shutdown();  
  70.         }  
  71.           
  72.     }  
  73.       
  74. }  

 

注意:elasticsearch-hadoop里的EsSpout类用到的Storm版本过低,所以重写了一个ESSpout替换旧版本Storm的API。

 

$bin/storm jar /home/hadoop/Documents/esstorm-0.0.1-SNAPSHOT.jar org.platform.storm.elasticsearch.ES2StormTopology false

 

 

 

[java] view plain copy
 
  1. import java.util.HashMap;  
  2. import java.util.Map;  
  3.   
  4. import org.apache.storm.Config;  
  5. import org.apache.storm.LocalCluster;  
  6. import org.apache.storm.StormSubmitter;  
  7. import org.apache.storm.starter.bolt.PrinterBolt;  
  8. import org.apache.storm.topology.TopologyBuilder;  
  9. import org.apache.storm.utils.Utils;  
  10. import org.platform.storm.elasticsearch.bolt.ESBolt;  
  11. import org.platform.storm.elasticsearch.spout.ESSpout;  
  12.   
  13. public class Storm2ESTopology {  
  14.   
  15.     private static final String TOPOLOGY_NAME = "storm-es-topology";  
  16.       
  17.     public static void main(String[] args) {  
  18.         if (args.length != 1) {  
  19.             System.exit(0);  
  20.         }  
  21.         boolean isCluster = Boolean.parseBoolean(args[0]);  
  22.           
  23.         TopologyBuilder builder = new TopologyBuilder();  
  24.           
  25.         String target = "operator/telecom";  
  26.         String query = "?q=*";  
  27.         Map<Object, Object> spoutConf = new HashMap<Object, Object>();  
  28.         spoutConf.put("es.nodes""192.168.10.20:9200");  
  29.         spoutConf.put("es.read.field.include""name,phone,rcall,email,idCard,zipCode,address");  
  30.         spoutConf.put("es.storm.spout.fields""name,phone,rcall,email,idCard,zipCode,address");  
  31.         builder.setSpout("es-storm-spout"new ESSpout(target, query, spoutConf), 1);  
  32.           
  33.         builder.setBolt("storm-print-bolt"new PrinterBolt()).shuffleGrouping("es-storm-spout");  
  34.           
  35.         Map<Object, Object> boltConf = new HashMap<Object, Object>();  
  36.         boltConf.put("es.nodes""192.168.10.20:9200");  
  37.         boltConf.put("es.index.auto.create""true");  
  38.         boltConf.put("es.ser.writer.bytes.class""org.platform.storm.elasticsearch.bolt.StormTupleBytesConverter");  
  39.         //boltConf.put("es.input.json", "true");  
  40.         builder.setBolt("storm-es-bolt"new ESBolt("data/telecom", boltConf))  
  41.             .globalGrouping("es-storm-spout");  
  42.           
  43.         Config config = new Config();  
  44.         config.setDebug(true);  
  45.         if (isCluster) {  
  46.             try {  
  47.                 config.setNumWorkers(3);  
  48.                 StormSubmitter.submitTopologyWithProgressBar(  
  49.                         TOPOLOGY_NAME, config, builder.createTopology());  
  50.             } catch (Exception e) {  
  51.                 e.printStackTrace();  
  52.             }  
  53.         } else {  
  54.             LocalCluster cluster = new LocalCluster();  
  55.             cluster.submitTopology(TOPOLOGY_NAME, config, builder.createTopology());  
  56.             Utils.sleep(100000);  
  57.             cluster.killTopology(TOPOLOGY_NAME);  
  58.             cluster.shutdown();  
  59.         }  
  60.           
  61.     }  
  62.       
  63. }  

 

 

注意:elasticsearch-hadoop里的EsBolt、StormTupleBytesConverter类用到的Storm版本过低,所以重写了一个ESBolt、StormTupleBytesConverter替换旧版本Storm的API。

 

$bin/storm jar /home/hadoop/Documents/esstorm-0.0.1-SNAPSHOT.jar org.platform.storm.elasticsearch.Storm2ESTopology false

 

文献出自:http://blog.csdn.net/fighting_one_piece/article/details/52228641

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics