Solr5 DataImport 处理1对多关系

“问题” 和“派发处理” 是2个对象。 每个问题可以被派发N次, 最后结束问题。

实际查询中数据量可能很大,同时即可以从问题查派发部门, 也可以从派发情况查询问题(如本部门处理的问题)

使用的技巧就是:增加doc_id, doc_type字段。 实现多Entity查询。

schema.xml

  1 <?xml version="1.0" encoding="UTF-8" ?>
  2 <schema name="uum" version="1.2">
  3   <types>
  4     <fieldType name="boolean" class="solr.BoolField"/>
  5     <fieldType name="date" class="solr.TrieDateField"/>
  6     <fieldType name="float" class="solr.TrieFloatField"/>
  7     <fieldType name="int" class="solr.TrieIntField"/>
  8     <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
  9     <fieldType name="string" class="solr.StrField"/>
 10     <fieldType name="url" class="solr.StrField" indexed="false" stored="true" />
 11 
 12     <fieldType name="simpletext" 
 13                class="solr.TextField" 
 14                positionIncrementGap="100">
 15       <analyzer>
 16         <tokenizer class="solr.StandardTokenizerFactory"/>
 17         <filter class="solr.LowerCaseFilterFactory"/>
 18       </analyzer>
 19     </fieldType>
 20 
 21     <fieldType name="ignored" class="solr.StrField" 
 22                indexed="false" stored="false" />
 23                
 24    <fieldType name="date_l" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
 25                
 26   </types>
 27     
 28   <fields>
 29 
 30     <!--
 31        FIELDS THAT ARE IN DOCS OF MULTIPLE TYPES
 32       -->
 33 
 34     <!-- this will be our uniqueKey, so it has to be distinct across
 35          all types of documents
 36       -->
 37     <field name="doc_id" type="string" />
 38 
 39     <!-- the type (or domain) of our document -->
 40     <field name="doc_type" type="string" />
 41 
 42     <!-- external URLs -->
 43     <dynamicField name="*_url" type="url" multiValued="false" />
 44     <dynamicField name="*_urls" type="url" multiValued="true"/>
 45 
 46     <!-- dates -->
 47     <dynamicField name="*_dt" type="date" />
 48 
 49     <!-- numeric values that might come in hand for relevancy biasing
 50          (they all relate to popularity)
 51       -->
 52     <dynamicField name="*_count" type="int" multiValued="false"/>
 53 
 54     <field name="_version_" type="long" indexed="true" stored="true"/>
 55     <field name="_root_" type="string" indexed="true" stored="false"/>
 56     <!-- Field used by Suggester for autocompletion -->
 57     <field name="autocomplete" 
 58            type="simpletext" 
 59            stored="false" 
 60            multiValued="true" />
 61 
 62     <!-- quick search field -->
 63     <field name="catchall" 
 64            type="simpletext" 
 65            stored="false"
 66            omitNorms="true"
 67            multiValued="true" />
 68     <field name="ID" type="string" multiValued="false"/>
 69     
 70     <!-- 
 71        PETITION
 72       -->
 73     <field name="TenantId"  type="string" multiValued="false"/>
 74     <field name="PetitionId"  type="string" multiValued="false"/>
 75     <field name="PetitionNumber"  type="string" multiValued="false"/>
 76     <field name="Title"  type="simpletext" multiValued="false"/>
 77     <field name="Content"  type="simpletext" multiValued="false"/>
 78     <field name="Tel"  type="string" multiValued="false"/>
 79     <field name="EventAddress"  type="simpletext" multiValued="false"/>
 80     <field name="DutyGridName"  type="string" multiValued="false"/>
 81     <field name="ComplaintType"  type="string" multiValued="false"/>
 82     <field name="IsVoid"  type="boolean" multiValued="false"/>
 83     <field name="IsEnd"  type="boolean" multiValued="false"/>
 84     <field name="GridAddress"  type="simpletext" multiValued="false"/>
 85     <field name="CategoryName"  type="string" multiValued="false"/>
 86     <field name="Category"  type="string" multiValued="false"/>
 87     
 88     <field name="Status"  type="string" multiValued="false"/>
 89     <field name="RegisterOn"  type="date" multiValued="false"/>
 90     <field name="DeadLine"  type="date" multiValued="false"/>
 91     <field name="ReportOn"  type="date" multiValued="false"/>
 92     <field name="EndCaseOn"  type="date" multiValued="false"/>
 93     <field name="CreatedBy"  type="string" multiValued="false"/>
 94     <field name="SourceWay"  type="string" multiValued="false"/>
 95     <field name="ISWGXTSB"  type="string" multiValued="false"/>
 96     <field name="RegisterOffice"  type="string" multiValued="false"/>
 97     <field name="EventLevel"  type="string" multiValued="false"/>
 98     <field name="ImportantLevel"  type="string" multiValued="false"/>
 99     
100         <!--
101                 PETITION/DISPATCH
102         -->
103           <field name="DispatchOffices" type="string" multiValued="true"/>
104           <!--<field name="DispatchOfficeNames" />-->
105           <field name="ReceiveOffices"  type="string" multiValued="true"/>
106           <field name="ReceiveOfficeNames"  type="string" multiValued="true"/>
107           
108         
109         <!--
110                 PETITION/PARTICIPANT
111         -->
112           <field name="OrgUnits" type="string" multiValued="true"/>
113           <field name="Participants"  type="string" multiValued="true"/>
114 
115     <!-- 
116        DISPATCH
117       -->
118 
119       <field name="Dispatcher" type="string" multiValued="false"/>
120       <field name="DispatchOn" type="date" multiValued="false"/>
121       <field name="DispatchOffice" type="string" multiValued="false"/>
122       <field name="DispatchOfficeName" type="string" multiValued="false"/>
123       <field name="ReceiveOffice" type="string" multiValued="false"/>
124       <field name="ReceiveOfficeName" type="string" multiValued="false"/>
125       <field name="StartOn"  type="date" multiValued="false"/>
126       <field name="DealWay"  type="string" multiValued="false"/>
127       <field name="FeedBackType"  type="string" multiValued="false"/>
128       <field name="FeedBackPeople"  type="string" multiValued="false"/>
129       <field name="FeedBackOn"   type="date" multiValued="false"/>
130       <field name="FeedBackMsg"   type="simpletext" multiValued="false"/>
131       <field name="NoPublicOpinion"   type="simpletext" multiValued="false"/>
132       <field name="IsPublic"  type="boolean" multiValued="false"/>
133       <field name="IsAlreadyReply"  type="boolean" multiValued="false"/>
134       <field name="IsAlreadyContact"  type="boolean" multiValued="false"/>
135 
136   </fields>
137 
138 
139   <!-- copy author names and title titles to a field to autocomplete
140   <copyField source="canonical_name" dest="autocomplete"/>
141   <copyField source="title" dest="autocomplete"/> -->
142   
143   <!-- copy everything into one big field for easy searching -->
144   <copyField source="PetitionNumber" dest="catchall"/>
145   <copyField source="Title" dest="catchall"/>
146   <copyField source="Content" dest="catchall"/>
147   <copyField source="Tel" dest="catchall"/>
148   <copyField source="EventAddress" dest="catchall"/>
149 
150   <!-- A unique Key field isn't neccessary, but it's the only way Solr -->
151   <!-- can automaticly replace docs when they change -->
152   <!-- DataImportHandler is also very unhappy if you don't have one -->
153   <uniqueKey>doc_id</uniqueKey>
154 
155   <!-- It's a *very* good idea to have a default search field -->
156   <defaultSearchField>catchall</defaultSearchField>
157 
158 </schema>
schema.xml

db-data-config.xml

  1 <dataConfig>
  2     <dataSource type="JdbcDataSource"
  3                 driver="oracle.jdbc.driver.OracleDriver"
  4                 url="jdbc:oracle:thin:@192.168.0.0:1521:test" 
  5                 user="user" 
  6                 password="pwd"
  7                 />
  8     <document>
  9         <entity name="petition" 
 10                 pk="ID"
 11                 transformer="TemplateTransformer"
 12                 query="select * from VW_HIS_EventInfo "
 13                 deltaImportQuery="select t.* from VW_HIS_EventInfo t where t.id='${dataimporter.delta.ID}'"
 14                 deltaQuery="select t.* from VW_HIS_EventInfo t where  
 15                 RegistorOn> NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')
 16                 or ReportOn> NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')
 17                 or endcaseon> NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT') ">
 18                 >
 19           <field  column="doc_id" template="PE_${petition.ID}" />
 20           <field  column="doc_type"  template="PE" />
 21 
 22           <!--<field  column="LATLON" name="LatLon_p"/>-->
 23           <field  column="TENANTID" name="TenantId" />
 24           <field  column="ID" name="PetitionId" />
 25           <field  column="PETITIONNUMBER" name="PetitionNumber" />
 26           <field  column="TITLE" name="Title" />
 27           <field  column="CONTENT" name="Content" />
 28           <field  column="TEL" name="Tel" />
 29           <field  column="EVENTADDRESS" name="EventAddress" />
 30           <!--<field  column="AREANAME" name="AreaName" />-->
 31           <field  column="DUTYGRIDNAME" name="DutyGridName" />
 32           <field  column="GRIDADDRESS" name="GridAddress" />
 33           <field  column="COMPLAINTQUALITYNAME" name="ComplaintType" />
 34           <field  column="ISVOID" name="IsVoid" />
 35           <field  column="ISEND" name="IsEnd" />
 36           <field  column="CATEGORYNAME" name="CategoryName" />
 37           <field  column="CATEGORYCODE" name="Category" />
 38           <field  column="STATUS" name="Status" />
 39           <field  column="REGISTORON" name="RegisterOn" />
 40           <field  column="DEADLINE" name="DeadLine" />
 41           <field  column="CREATEDBY" name="CreatedBy" />
 42           <field  column="REPORTON" name="ReportOn" />
 43           <field  column="SOURCEWAY" name="SourceWay" />
 44           <field  column="ISWGXTSB" name="ISWGXTSB" />
 45           <field  column="REGISTOROFFICE" name="RegisterOffice" />
 46           <!--<field  column="TOOFFICENAME" name="ToOfficeName" />-->
 47           <field  column="EVENTLEVEL" name="EventLevel" />
 48           <field  column="IMPORTANTLEVEL" name="ImportantLevel" />
 49           <field  column="ENDCASEON" name="EndCaseOn" />
 50           <!--<field  column="ENDOPINION" name="EndOpinion" />-->
 51           
 52             <entity name="petition_dispatch"
 53                   pk="ID"
 54                   query="select * from VW_HIS_DispatchInfo where petitionid='${petition.ID}'"
 55                   deltaQuery="select ID from VW_HIS_DispatchInfo where 
 56                             DISPATCHON > NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')
 57                         or  FEEDBACKON > NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')"
 58                   parentDeltaQuery="select ID from VW_HIS_EventInfo where ID='${petition_dispatch.PETITIONID}'">
 59                   <!--<field  column="DISPATCHER" name="Dispatchers" />-->
 60                  
 61                   <field  column="DISPATCHOFFICE" name="DispatchOffices" />
 62                   <!--<field  column="DISPATCHOFFICENAME" name="DispatchOfficeNames" />-->
 63                   <field  column="RECEIVEOFFICE" name="ReceiveOffices" />
 64                   <field  column="RECEIVEOFFICENAME" name="ReceiveOfficeNames" />
 65             </entity>
 66             
 67             <entity name="petiton_participant"
 68                   pk="PARTICIPANT"
 69                   query="select distinct ORGUNIT,PARTICIPANT from vw_his_participant where petitionid='${petition.ID}'"
 70                   deltaQuery="select PARTICIPANT from vw_his_participant where HandleOn > NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT') "
 71                   parentDeltaQuery="select ID from VW_HIS_EventInfo where ID='${petiton_participant.PETITIONID}'">
 72                     <!--<field column="HANDLEON" name="HandleOns"/>-->
 73                     <field column="ORGUNIT" name="OrgUnits"/>
 74                     <!--<field column="PARTICIPANTNAME" name="ParticipantNames"/>-->
 75                     <field column="PARTICIPANT" name="Participants"/>
 76                     <!--<field column="PARTICIPANTTYPE" name="ParticipantTypes"/>-->
 77             </entity>
 78                   
 79         </entity>
 80         
 81         <entity name="dispatch"
 82               pk="ID"
 83               transformer="TemplateTransformer"
 84               query="select * from VW_HIS_DispatchInfo"
 85               deltaImportQuery="select t.* from VW_HIS_DispatchInfo t where t.id='${dataimporter.delta.ID}'"
 86               deltaQuery="select ID from VW_HIS_DispatchInfo where DISPATCHON > NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')
 87                     or  FEEDBACKON > NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')">
 88             
 89             <field  column="doc_id" template="DIS_${dispatch.ID}" />
 90             <field  column="doc_type" template="DIS" />
 91             
 92             <field  column="TENANTID" name="TenantId" />
 93             <field  column="PETITIONID" name="PetitionId" />
 94               <field  column="DISPATCHER" name="Dispatcher" />
 95               <field  column="DISPATCHON" name="DispatchOn" />
 96               <field  column="DISPATCHOFFICE" name="DispatchOffice" />
 97               <field  column="DISPATCHOFFICENAME" name="DispatchOfficeName" />
 98               <field  column="RECEIVEOFFICE" name="ReceiveOffice" />
 99               <field  column="RECEIVEOFFICENAME" name="ReceiveOfficeName" />
100               <field  column="STARTON" name="StartOn" />
101               <field  column="DEADLINE" name="DeadLine" />
102               <field  column="DEALWAY" name="DealWay" />
103               <field  column="STATUS" name="Status" />
104               <field  column="FEEDBACKTYPE" name="FeedBackType" />
105               <field  column="FEEDBACKPEOPLE" name="FeedBackPeople" />
106               <field  column="FEEDBACKON" name="FeedBackOn" />
107               <field  column="FEEDBACKMSG" name="FeedBackMsg" />
108               <field  column="NOPUBLICOPINION" name="NoPublicOpinion" />
109               <field  column="ISPUBLIC" name="IsPublic" />
110               <field  column="ISALREADYREPLY" name="IsAlreadyReply" />
111               <field  column="ISALREADYCONCAT" name="IsAlreadyContact" />
112               
113               <entity name="dispatch_petition"
114                   pk="ID"
115                   query="select * from VW_HIS_EventInfo where id='${dispatch.PETITIONID}'"
116                   deltaQuery="select ID from VW_HIS_EventInfo where endcaseon> NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')"
117                   parentDeltaQuery="select ID from VW_HIS_DispatchInfo where PETITIONID='${dispatch_petition.ID}'">
118                   <field  column="PETITIONNUMBER" name="PetitionNumber" />
119                   <field  column="TITLE" name="Title" />
120                   <field  column="CONTENT" name="Content" />
121                   <field  column="TEL" name="Tel" />
122                   <field  column="EVENTADDRESS" name="EventAddress" />
123                   <!--<field  column="AREANAME" name="AreaName" />-->
124                   <field  column="DUTYGRIDNAME" name="DutyGridName" />
125                   <field  column="GRIDADDRESS" name="GridAddress" />
126                   <field  column="COMPLAINTQUALITYNAME" name="ComplaintType" />
127                   <field  column="CATEGORYNAME" name="CategoryName" />
128                   <field  column="CATEGORYCODE" name="Category" />
129             </entity>
130         </entity>
131           
132      </document>
133 </dataConfig>
db-data-config.xml
原文地址:https://www.cnblogs.com/crabo/p/solr_dih_multi_entity.html