0%

csharp call python

 

環境準備

主要參考這篇 , 不過雷一堆
我自己是用 anaconda 作為 python 環境 , 首先要確認自己是 x86 還是 x64

1
2
import platform
print(platform.architecture())

接著新增一個 net4.8console 然後安裝 pythonnet
然後開啟 configuration manager 假設你是 x64 複製一份給 x64 然後記得要切換到 x64
接著照著以下程式碼設定應該就可以動了
這裡面最雷的部分應該就是要額外設定路徑 , 可以參考 這篇

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
using Python.Runtime;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace ConsoleAppPy
{
internal class Program
{
static void Main(string[] args)
{
// Modify Path
//string path = @"C:\tools\Anaconda3\envs;" + Environment.GetEnvironmentVariable("PATH", EnvironmentVariableTarget.Machine);

// Set Path
//Environment.SetEnvironmentVariable("PATH", path, EnvironmentVariableTarget.Process);

// Set PythonHome
Environment.SetEnvironmentVariable("PYTHONHOME", @"C:\tools\Anaconda3", EnvironmentVariableTarget.Process);

// Set PythonPath
// ONLY SET THIS IF YOU ARE SURE WHAT YOU ARE DOING
Environment.SetEnvironmentVariable("PYTHONPATH", @"C:\tools\Anaconda3\Lib", EnvironmentVariableTarget.Process);


//Runtime.PythonDLL = @"C:\\tools\\Anaconda3\\python38.dll";
//PythonEngine.PythonHome = Environment.GetEnvironmentVariable("PYTHONHOME", EnvironmentVariableTarget.Process);

//string pythonDll = @"C:\\tools\\Anaconda3\\python38.dll";
//Environment.SetEnvironmentVariable("PYTHONNET_PYDLL", pythonDll);

Runtime.PythonDLL = @"C:\\tools\\Anaconda3\\python38.dll";
//Runtime.PythonDLL = @"C:\tools\Anaconda3\envs\excel\python311.dll";
PythonEngine.Initialize();
using (Py.GIL())
{
PythonEngine.RunSimpleString(@"print(""helloworld"")");
}
}
}
}

串接 ddddocr

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
using Python.Runtime;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace ConsoleAppPy
{
internal class Program
{
static void Main(string[] args)
{
ConfigPythonPath();
string result = "";
var text = RunPythonCodeAndReturn(
@"
import ddddocr

ocr = ddddocr.DdddOcr()
with open('C:\captcha_images\img1.png', 'rb') as f:
img_bytes = f.read()
res = ocr.classification(img_bytes)
",
"res");


Console.WriteLine("text:" + text.ToString().ToUpper());
}

static void ConfigPythonPath()
{
// Modify Path
//string path = @"C:\tools\Anaconda3\envs;" + Environment.GetEnvironmentVariable("PATH", EnvironmentVariableTarget.Machine);

// Set Path
//Environment.SetEnvironmentVariable("PATH", path, EnvironmentVariableTarget.Process);

// Set PythonHome
//Environment.SetEnvironmentVariable("PYTHONHOME", @"C:\tools\Anaconda3", EnvironmentVariableTarget.Process);
Environment.SetEnvironmentVariable("PYTHONHOME", @"C:\tools\Anaconda3\envs\excel", EnvironmentVariableTarget.Process);

// Set PythonPath
// ONLY SET THIS IF YOU ARE SURE WHAT YOU ARE DOING
//Environment.SetEnvironmentVariable("PYTHONPATH", @"C:\tools\Anaconda3\Lib", EnvironmentVariableTarget.Process);
Environment.SetEnvironmentVariable("PYTHONPATH", @"C:\tools\Anaconda3\envs\excel\Lib", EnvironmentVariableTarget.Process);


//Runtime.PythonDLL = @"C:\\tools\\Anaconda3\\python38.dll";
//PythonEngine.PythonHome = Environment.GetEnvironmentVariable("PYTHONHOME", EnvironmentVariableTarget.Process);

//string pythonDll = @"C:\\tools\\Anaconda3\\python38.dll";
//Environment.SetEnvironmentVariable("PYTHONNET_PYDLL", pythonDll);

//Runtime.PythonDLL = @"C:\\tools\\Anaconda3\\python38.dll";
Runtime.PythonDLL = @"C:\tools\Anaconda3\envs\excel\python311.dll";


}
public static string RunPythonCodeAndReturn(string pycode, string returnedVariableName) {
string returnedVariable = "";
PythonEngine.Initialize();
using (Py.GIL())
{
using (var scope = Py.CreateScope())
{
scope.Exec(pycode);
returnedVariable=scope.Get<string>(returnedVariableName);
}
}
return returnedVariable;
}


}
}

搭配 Puppeteer ddddocr 破解驗證碼

首先要先把 img 轉為 base64
先用 WaitForSelectorAsync 取得 img 標籤 , 接著用 EvaluateFunctionAsync 把 js 插進去
用 canvas 建立出來的 base64 預設會帶有 image/png;base64, , 所以要把它去除

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
                        var img = await page.WaitForSelectorAsync(@"img");
var jsCode =
@"() => {
var img = document.querySelector('img');
var canvas = document.createElement('canvas');
canvas.width = img.width;
canvas.height = img.height;
var ctx = canvas.getContext('2d');
ctx.drawImage(img, 0, 0);
var base64 = canvas.toDataURL();
var strOnly = canvas.toDataURL('image/png').split(';base64,')[1]
return strOnly;
}";

var b64 = await img.EvaluateFunctionAsync<string>(jsCode);

接著回顧老外的 說明 定義一個這樣的 function

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
public static object RunPythonCodeAndReturn(string pycode, object parameter, string parameterName, string returnedVariableName)
{
object returnedVariable = new object();
PythonEngine.Initialize();
using (Py.GIL())
{
using (var scope = Py.CreateScope())
{
scope.Set(parameterName, parameter.ToPython());
scope.Exec(pycode);
returnedVariable = scope.Get<object>(returnedVariableName);
}
}
return returnedVariable;
}

接著在 c# 宣告 b64 這個變數 , 讓他承接 js 回傳的 base64 字串結果
然後再把 b64 丟入 python 內去辨識就大功告成

1
2
3
4
5
6
7
8
9
10
11
12
13
14
                        var b64 = await img.EvaluateFunctionAsync<string>(jsCode);

Console.WriteLine(b64);
var deText = RunPythonCodeAndReturn(
@"
import ddddocr
import base64
imgdata = base64.b64decode(b64)
ocr = ddddocr.DdddOcr()
res = ocr.classification(imgdata)
print(res)

",
b64, "b64", "res");

程式碼大概長這樣

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
        static async Task<string> Crack(string username, string pwd)
{
string result = "";
try
{
//await new BrowserFetcher( ).DownloadAsync( BrowserFetcher.DefaultRevision );
using (var browser = await Puppeteer.LaunchAsync(new LaunchOptions()
{
Headless = false,
ExecutablePath = @"C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
}))
{
using (var page = await browser.NewPageAsync())
{
await page.GoToAsync("http://yourip/index.php");
await page.WaitForSelectorAsync(@"input[name='password']");
await page.FocusAsync("input[name='password']");
await page.Keyboard.TypeAsync(pwd);
var img = await page.WaitForSelectorAsync(@"img");
var jsCode =
@"() => {
var img = document.querySelector('img');
var canvas = document.createElement('canvas');
canvas.width = img.width;
canvas.height = img.height;
var ctx = canvas.getContext('2d');
ctx.drawImage(img, 0, 0);
var base64 = canvas.toDataURL();
var strOnly = canvas.toDataURL('image/png').split(';base64,')[1]
return strOnly;
}";

var b64 = await img.EvaluateFunctionAsync<string>(jsCode);

Console.WriteLine(b64);
var deText = RunPythonCodeAndReturn(
@"
import ddddocr
import base64
imgdata = base64.b64decode(b64)
ocr = ddddocr.DdddOcr()
res = ocr.classification(imgdata)
print(res)

",
b64, "b64", "res");

await page.FocusAsync("input[name='authcode']");
await page.Keyboard.TypeAsync(deText.ToString());
await page.ClickAsync("input[name='submit']");
}
}

return await Task.FromResult(result);
}
catch (Exception ex)
{
throw;
}
}

破解三民書局驗證碼

特別注意這裡要安裝 7.0 的 PuppeteerSharp 後續的版本不曉得為啥 headless 會掛掉 , 暫時沒研究
大致上重點如下

找出相對應的 html 標籤

1
2
3
4
5
#Account
#pwd
#HumanPass
button[type="submit"]
#CaptchaImg

接著發現他有個 ReloadCaptchaImg 方法 , 呼叫後會得到這樣的網址 https://www.sanmin.com.tw/other/captcha/27
可以多打幾個 request , 先自己測試辨識效果看看

1
2
3
4
ReloadCaptchaImg() {
var a = Math.floor(Math.random() * (100 - 0));
$('#CaptchaImg').attr('src', '/other/captcha/' + a);
}

然後發現他有個雷 , 他的 img size 實際上是 90 * 35 , 可是 html tag 上面是 70 * 34
所以如果插 js 直接用他的長寬下去會發現圖片被裁切掉一個字

1
2
3
4
5
6
7
8
9
var img = document.querySelector('#CaptchaImg');
var canvas = document.createElement('canvas');
canvas.width = 90;
canvas.height = 35;
var ctx = canvas.getContext('2d');
ctx.drawImage(img, 0, 0);
var base64 = canvas.toDataURL();
var strOnly = canvas.toDataURL('image/png').split(';base64,')[1]
return strOnly;

另外因為 python 有縮排要求 , 所以如果你 format c# 的 code 可能會連同 python 的 code 一起被縮排然後噴得莫名其妙 , 最後程式碼如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73

static async Task<string> GetSanMin(string username, string pwd)
{
string result = "";
try
{
using (var browser = await Puppeteer.LaunchAsync(new LaunchOptions()
{
Headless = false,
ExecutablePath = @"C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
}))
{
var page = await browser.NewPageAsync();
await page.GoToAsync("https://www.sanmin.com.tw/member/login/?ReturnUrl=%2fmember%2findex");


await page.WaitForSelectorAsync("#Account");
await page.FocusAsync("#Account");
await page.Keyboard.TypeAsync(username);
Thread.Sleep(3000);

await page.WaitForSelectorAsync("#pwd");
await page.FocusAsync("#pwd");
await page.Keyboard.TypeAsync(pwd);
Thread.Sleep(3000);


var img = await page.WaitForSelectorAsync(@"#CaptchaImg");
var jsCode =
@"() => {
var img = document.querySelector('#CaptchaImg');
var canvas = document.createElement('canvas');
canvas.width = 90;
canvas.height = 35;
var ctx = canvas.getContext('2d');
ctx.drawImage(img, 0, 0);
var base64 = canvas.toDataURL();
var strOnly = canvas.toDataURL('image/png').split(';base64,')[1]
return strOnly;
}";

var b64 = await img.EvaluateFunctionAsync<string>(jsCode);

Console.WriteLine(b64);
var deText = RunPythonCodeAndReturn(
@"
import ddddocr
import base64
imgdata = base64.b64decode(b64)
ocr = ddddocr.DdddOcr()
res = ocr.classification(imgdata)
print(res)

",
b64, "b64", "res");

await page.FocusAsync("#HumanPass");
await page.Keyboard.TypeAsync(deText.ToString());


Thread.Sleep(3000);
await page.ClickAsync("button[type='submit']");

Thread.Sleep(3000);
}

return await Task.FromResult(result);
}
catch (Exception ex)
{
throw;
}
}
關閉