Commit 865bf7cc by mahaisong

fix:按年查

parent be7c92c7
 
...@@ -157,9 +157,7 @@ namespace TaikorES_ImportLocalES ...@@ -157,9 +157,7 @@ namespace TaikorES_ImportLocalES
DateTime startStockKeepTime = DateTime.Now; DateTime startStockKeepTime = DateTime.Now;
try try
{ {
//载入文件 //载入文件
string filepath = Environment.CurrentDirectory + "\\MatchCrawlID.csv"; string filepath = Environment.CurrentDirectory + "\\MatchCrawlID.csv";
try try
...@@ -174,6 +172,14 @@ namespace TaikorES_ImportLocalES ...@@ -174,6 +172,14 @@ namespace TaikorES_ImportLocalES
StreamReader sr = new StreamReader(filepath, Encoding.Default); StreamReader sr = new StreamReader(filepath, Encoding.Default);
String line; String line;
line = sr.ReadLine();
//标题
if (!line.Contains("CrawlID"))
{
MatchCrawlID.TryAdd(ADD(), line.ToString().Trim().Replace("\r", string.Empty).Replace("\n", string.Empty));
}
while ((line = sr.ReadLine()) != null) while ((line = sr.ReadLine()) != null)
{ {
MatchCrawlID.TryAdd(ADD(), line.ToString().Trim().Replace("\r", string.Empty).Replace("\n", string.Empty)); MatchCrawlID.TryAdd(ADD(), line.ToString().Trim().Replace("\r", string.Empty).Replace("\n", string.Empty));
...@@ -216,11 +222,23 @@ namespace TaikorES_ImportLocalES ...@@ -216,11 +222,23 @@ namespace TaikorES_ImportLocalES
//查询: 开始时间 starttime endtime nextstarttime nextendtime //查询: 开始时间 starttime endtime nextstarttime nextendtime
//如果小于1W条,则继续按照下一个时间查询(nextstarttime,nextendtime) //如果小于1W条,则继续按照下一个时间查询(nextstarttime,nextendtime)
//如果大于等于1W条,则下一个时间查询的起始时间是 本地的最后一条(第1W条)数据的fetchTime(TenThousandTime)。 循环(递进TenThousandTime,nextendtime),直到小于1W条为止,再按照下一个时间查询(nextstarttime,nextendtime //如果大于等于1W条,则下一个时间查询的起始时间是 本地的最后一条(第1W条)数据的fetchTime(TenThousandTime)。 循环(递进TenThousandTime,nextendtime),直到小于1W条为止,再按照下一个时间查询(nextstarttime,nextendtime
//数据评估。大约1个月30W条记录。 平均1天1W条。
//数据密度小的,可以几天一次。
//数据密度大的,1天就好几万。不能扩大数据范围,给ES带来压力。
foreach (var item in MatchCrawlID) foreach (var item in MatchCrawlID)
{ {
this.Invoke(new MethodInvoker(() =>
{
//每个爬虫执行完,都重新输出。
this.listBox1.Items.Clear();
}));
DateTime dtitem = DateTime.Now; DateTime dtitem = DateTime.Now;
long count = 0; long count = 0;
string crawlID = item.Value.ToString(); string crawlID = item.Value.ToString();
...@@ -229,7 +247,8 @@ namespace TaikorES_ImportLocalES ...@@ -229,7 +247,8 @@ namespace TaikorES_ImportLocalES
this.Invoke(new MethodInvoker(() => this.Invoke(new MethodInvoker(() =>
{ {
this.listBox1.Items.Add("开始时间:" + dtitem.ToString("yyyy-MM-dd HH:mm:ss.fff") + "执行完成;列表中第" + item.Key.ToString() + "个爬虫," + item.Value.ToString() ); this.listBox1.Items.Add("开始时间:" + dtitem.ToString("yyyy-MM-dd HH:mm:ss.fff") + "执行开始;列表中第" + item.Key.ToString() + "个爬虫," + item.Value.ToString() );
Logger.Fatal("开始时间:" + dtitem.ToString("yyyy-MM-dd HH:mm:ss.fff") + "执行开始;列表中第" + item.Key.ToString() + "个爬虫," + item.Value.ToString());
})); }));
...@@ -274,7 +293,7 @@ namespace TaikorES_ImportLocalES ...@@ -274,7 +293,7 @@ namespace TaikorES_ImportLocalES
{ {
try try
{ {
GC.Collect();
if (TodayEndTime > Where_ENDTime) if (TodayEndTime > Where_ENDTime)
{ {
...@@ -297,6 +316,9 @@ namespace TaikorES_ImportLocalES ...@@ -297,6 +316,9 @@ namespace TaikorES_ImportLocalES
//goto //goto
TagToday: TagToday:
GC.Collect();
List<Item> TaiKorNewsList = null; List<Item> TaiKorNewsList = null;
try try
...@@ -368,11 +390,7 @@ namespace TaikorES_ImportLocalES ...@@ -368,11 +390,7 @@ namespace TaikorES_ImportLocalES
{ {
htItemContext.SyncOffset.AddOrUpdate(_syncState); htItemContext.SyncOffset.AddOrUpdate(_syncState);
htItemContext.SaveChanges(); htItemContext.SaveChanges();
} }
string msg = string.Format("Success sync hj beige news at {0}, this time index {1} items.", DateTime.Now, TaiKorNewsList.Count);
Logger.Info(msg);
} }
} }
if (null != TaiKorNewsList) if (null != TaiKorNewsList)
...@@ -382,7 +400,9 @@ namespace TaikorES_ImportLocalES ...@@ -382,7 +400,9 @@ namespace TaikorES_ImportLocalES
try try
{ {
this.listBox1.Items.Add("时间:" + DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss.fff") + " 数据月期:" + TodayQueryDate.ToString("yyyy-MM") + "插入ES" + TaiKorNewsList.Count + "条;当前进度为:" + ((DateTime)(_syncState.last_pubdate)).ToString("yyyy - MM - dd HH: mm:ss.fff")); this.listBox1.Items.Add("时间:" + DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss.fff") + " 数据期:" + TodayQueryDate.ToString("yyyy-MM") + "插入ES" + TaiKorNewsList.Count + "条;当前进度为:" + ((DateTime)(_syncState.last_pubdate)).ToString("yyyy - MM - dd HH: mm:ss.fff"));
Logger.Fatal("时间:" + DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss.fff") + " 数据期:" + TodayQueryDate.ToString("yyyy-MM") + "插入ES" + TaiKorNewsList.Count + "条;当前进度为:" + ((DateTime)(_syncState.last_pubdate)).ToString("yyyy - MM - dd HH: mm:ss.fff"));
} }
catch { } catch { }
...@@ -442,7 +462,7 @@ namespace TaikorES_ImportLocalES ...@@ -442,7 +462,7 @@ namespace TaikorES_ImportLocalES
} }
} }//end while
} }
catch (Exception ex) catch (Exception ex)
{ {
...@@ -488,7 +508,7 @@ namespace TaikorES_ImportLocalES ...@@ -488,7 +508,7 @@ namespace TaikorES_ImportLocalES
} }
#endregion #endregion
} }//end for 循环下一个爬虫
} }
catch (Exception ex) catch (Exception ex)
{ {
......
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
...@@ -33,6 +33,25 @@ ...@@ -33,6 +33,25 @@
<ErrorReport>prompt</ErrorReport> <ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel> <WarningLevel>4</WarningLevel>
</PropertyGroup> </PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|x64'">
<DebugSymbols>true</DebugSymbols>
<OutputPath>bin\x64\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<DebugType>full</DebugType>
<PlatformTarget>x64</PlatformTarget>
<ErrorReport>prompt</ErrorReport>
<CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Release|x64'">
<OutputPath>bin\x64\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<Optimize>true</Optimize>
<DebugType>pdbonly</DebugType>
<PlatformTarget>x64</PlatformTarget>
<ErrorReport>prompt</ErrorReport>
<CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
<Prefer32Bit>true</Prefer32Bit>
</PropertyGroup>
<ItemGroup> <ItemGroup>
<Reference Include="Elasticsearch.Net, Version=2.0.0.0, Culture=neutral, PublicKeyToken=96c599bbe3e70f5d, processorArchitecture=MSIL"> <Reference Include="Elasticsearch.Net, Version=2.0.0.0, Culture=neutral, PublicKeyToken=96c599bbe3e70f5d, processorArchitecture=MSIL">
<HintPath>..\packages\Elasticsearch.Net.2.5.8\lib\net46\Elasticsearch.Net.dll</HintPath> <HintPath>..\packages\Elasticsearch.Net.2.5.8\lib\net46\Elasticsearch.Net.dll</HintPath>
......
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
<?xml version="1.0" encoding="utf-8"?> ++ /dev/null
<?xml version="1.0" encoding="utf-8"?>
<configuration>
<configSections>
<section name="entityFramework" type="System.Data.Entity.Internal.ConfigFile.EntityFrameworkSection, EntityFramework, Version=6.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" requirePermission="false" />
</configSections>
<startup>
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.6" />
</startup>
<appSettings>
<add key="webpages:Version" value="3.0.0.0" />
<add key="webpages:Enabled" value="false" />
<add key="ClientValidationEnabled" value="true" />
<add key="UnobtrusiveJavaScriptEnabled" value="true" />
<add key="log4net.Config" value="Configs\log4net.config" />
<add key="log4net.Config.Watch" value="True" />
<add key="OAuth.Authority" value="https://identity.palaspom.com/id" />
<add key="DAAS.WebAPI" value="https://daas.palaspom.com/" />
<!--<add key="Redis.Host.Crawler" value="mech.palaspom.com" />
<add key="Redis.Port.Crawler" value="11377" />
<add key="Server.Notify.Users" value="Carey" />
-->
<!--取转载数Redis数据库地址-->
<!--
<add key="Redis.Host.ItemDup" value="pubtopic.org" />
<add key="Redis.Port.ItemDup" value="11370" />
-->
<!--阅读器Redis库(本地库)-->
<!--
<add key="Redis.Host.Reader" value="mech.palaspom.com" />
<add key="Redis.Port.Reader" value="11389" />
-->
<!--mysql实体 API地址-->
<!--
<add key="MysqlApiUrl" value="http://internal.palaspom.com/Entity/" />
<add key="APPModel" value="0" />
<add key="Analyzer.Core.KeywordExtracter.RemainPos" value="n" />
<add key="IssueID" value="HuaJin" />-->
</appSettings>
<connectionStrings>
<!--全部改成minder本地配置-->
<!--Minder本地ES-->
<add name="MinderESDatabase" connectionString="host=minder;port=9200;defaultIndex=palas_test;requesttimeout=30000" />
<!--来源ES、 态格生产环境ES-->
<add name="ESDatabase" connectionString="host=mech.palaspom.com|tank.palaspom.com;port=19235;defaultIndex=palas" />
<add name="PalasV5Context" connectionString="server=minder;user id=palas;password=lapas;persistsecurityinfo=True;database=Palas_V5;Character Set=utf8" providerName="MySql.Data.MySqlClient" />
<add name="PalasEntityContext" connectionString="server=minder;user id=palas;password=lapas;persistsecurityinfo=True;database=Palas_V5;Character Set=utf8" providerName="MySql.Data.MySqlClient" />
</connectionStrings>
<entityFramework>
<defaultConnectionFactory type="System.Data.Entity.Infrastructure.LocalDbConnectionFactory, EntityFramework">
<parameters>
<parameter value="mssqllocaldb" />
</parameters>
</defaultConnectionFactory>
<providers>
<provider invariantName="MySql.Data.MySqlClient" type="MySql.Data.MySqlClient.MySqlProviderServices, MySql.Data.Entity.EF6, Version=6.9.12.0, Culture=neutral, PublicKeyToken=c5687fc88969c44d"></provider>
<provider invariantName="System.Data.SqlClient" type="System.Data.Entity.SqlServer.SqlProviderServices, EntityFramework.SqlServer" />
</providers>
</entityFramework>
<runtime>
<assemblyBinding xmlns="urn:schemas-microsoft-com:asm.v1">
<dependentAssembly>
<assemblyIdentity name="Newtonsoft.Json" publicKeyToken="30ad4fe6b2a6aeed" culture="neutral" />
<bindingRedirect oldVersion="0.0.0.0-11.0.0.0" newVersion="11.0.0.0" />
</dependentAssembly>
<dependentAssembly>
<assemblyIdentity name="MySql.Data" publicKeyToken="c5687fc88969c44d" culture="neutral" />
<bindingRedirect oldVersion="0.0.0.0-6.9.12.0" newVersion="6.9.12.0" />
</dependentAssembly>
</assemblyBinding>
</runtime>
<system.data>
<DbProviderFactories>
<remove invariant="MySql.Data.MySqlClient" />
<add name="MySQL Data Provider" invariant="MySql.Data.MySqlClient" description=".Net Framework Data Provider for MySQL" type="MySql.Data.MySqlClient.MySqlClientFactory, MySql.Data, Version=6.9.12.0, Culture=neutral, PublicKeyToken=c5687fc88969c44d" />
</DbProviderFactories>
</system.data>
</configuration>
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment