脚本如下:
from bs4 import BeautifulSoup
def list_get(file):
soup = BeautifulSoup(open(file))
alist = soup.find_all('a',class_ = 'link')
list = []
for i in alist:
list.append(i.get('href'));
#for i in list:
# print(i)
return list
if __name__=="__main__":
list_get('List.htm')
list_get函数返回的是list字符串对象
其c语言调用的代码如下:
#include <stdio.h>
#include <stdlib.h>
#include <Python.h>
char* GDALPythonObjectToCStr(PyObject* pyObject);
int main(int argc, char *argv[])
{
Py_Initialize();
if(!Py_IsInitialized())
{
return -1;
}
PyRun_SimpleString("import sys");
PyRun_SimpleString("sys.path.append('./script')");
PyObject* pModule;
PyObject* pDict;
PyObject* pFunc;
pModule = PyImport_ImportModule("list");
if(!pModule)
{
printf("can't find list.py");
system("PAUSE");
getchar();
return -1;
}
pDict = PyModule_GetDict(pModule);
if(!pDict)
{
return -1;
}
pFunc = PyDict_GetItemString(pDict,"list_get");
if(!pFunc || !PyCallable_Check(pFunc))
{
printf("can't find function [list_get]");
getchar();
return -1;
}
PyObject* args = PyTuple_New(1);
PyTuple_SetItem(args,0,Py_BuildValue("s","List.htm"));
PyObject* value = PyObject_CallObject(pFunc,args);
int ret = PySequence_Check(value);
printf("check:%d\n",ret);
int length = PySequence_Size(value);
printf("length:%d\n",length);
int i = 0;
for(;i<length;i++)
{
PyObject* obj = PySequence_GetItem(value,i);
//char* str = PyBytes_AS_STRING(obj);
char* str = GDALPythonObjectToCStr(obj);
printf("link:%s\n",str);
free(str);
}
Py_DECREF(args);
Py_DECREF(pModule);
Py_Finalize();
system("PAUSE");
return 0;
}
/* Return a NULL terminated c String from a PyObject */
/* Result must be freed with GDALPythonFreeCStr */
char* GDALPythonObjectToCStr(PyObject* pyObject)
{
#if PY_VERSION_HEX >= 0x03000000
if(PyUnicode_Check(pyObject))
{
char *pszStr;
char *pszNewStr;
Py_ssize_t nLen;
PyObject* pyUTF8Str = PyUnicode_AsUTF8String(pyObject);
PyBytes_AsStringAndSize(pyUTF8Str,&pszStr,&nLen);
pszNewStr = (char*)malloc(nLen+1);
memcpy(pszNewStr,pszStr,nLen+1);
Py_XDECREF(pyUTF8Str);
return pszNewStr;
}
else if(PyBytes_Check(pyObject))
{
char *pszStr;
char *pszNewStr;
Py_ssize_t nLen;
PyBytes_AsStringAndSize(pyObject,&pszStr,&nLen);
pszNewStr = (char*)malloc(nLen+1);
memcpy(pszNewStr,pszStr,nLen+1);
return pszNewStr;
}
else
{
char *pszStr = (char*)malloc(1);
pszStr[0] = '\0';
return pszStr;
}
#else
return PyString_AsString(pyObject);
#endif
}