datatables performance

datatables performance

blue80blue80 Posts: 8Questions: 4Answers: 0

my mongodb database has over 5 million rows. the data column is big text content (many of them over 10m) with text index (db.edi_history.ensureIndex( { data: "text" }, {name: "edi_history_data"} );). When I just loaded 400000 rows, the datatables performance was ok. I can query any column, even the data column and got returned rows pretty quick. Now I loaded all the 5 million rows. I can not query the data column anymore. I tested a search, it ran overnight already, still processing. Can anyone help? By the way, when I query the data column from mongodb directly such as db.edi_history.find( { $text: { $search: "3808828" } } );, it's very quick to return results.
The server_side code is:
mb_internal_encoding('UTF-8');
$database = 'edi';
$collection = 'edi_history';
try {
$m = new MongoClient();
} catch (MongoConnectionException $e) {
die('Error connecting to MongoDB server');
}
$m_collection = $m->$database->$collection;
$fields = array();
$input =& $_GET;
$iColumns=$input['iColumns'];

$dataProps = array();
for ($i = 0; $i < $iColumns; $i++) {
$var = 'mDataProp_'.$i;
if (!empty($input[$var]) && $input[$var] != 'null') {
$dataProps[$i] = $input[$var];
}
}
$searchTermsAny = array();
$searchTermsAll = array();
if ( !empty($input['sSearch']) ) {
$sSearch = $input['sSearch'];
for ( $i=0 ; $i < $iColumns ; $i++ ) {
if ($input['bSearchable_'.$i] == 'true') {
if ($input['bRegex'] == 'true') {
$sRegex = str_replace('/', '\/', $sSearch);
} else {
$sRegex = preg_quote($sSearch, '/');
}
$searchTermsAny[] = array(
$dataProps[$i] => new MongoRegex( '/'.$sRegex.'/i' )
); } } }
for ( $i=0 ; $i < $iColumns ; $i++ ) {
if ( $input['bSearchable_'.$i] == 'true' && $input['sSearch_'.$i] != '' ) {
if ($input['bRegex_'.$i] == 'true') {
$sRegex = str_replace('/', '\/', $input['sSearch_'.$i]);
//$sRegex = str_replace('\n', '\r', $input['sSearch_'.$i]);
} else {
$sRegex = preg_quote($input['sSearch_'.$i], '/');
}
$searchTermsAll[ $dataProps[$i] ] = new MongoRegex( '/'.$sRegex.'/i' );
}
}
$searchTerms = $searchTermsAll;
if (!empty($searchTermsAny)) {
$searchTerms['$or'] = $searchTermsAny;
}
$cursor = $m_collection->find($searchTerms, $fields)->limit(5);

if ( isset( $input['iDisplayStart'] ) && $input['iDisplayLength'] != '-1' ) {
$cursor->limit( $input['iDisplayLength'] )->skip( $input['iDisplayStart'] );
}

if ( isset($input['iSortCol_0']) ) {
$sort_fields = array();
for ( $i=0 ; $i<intval( $input['iSortingCols'] ) ; $i++ ) {
if ( $input[ 'bSortable_'.intval($input['iSortCol_'.$i]) ] == 'true' ) {
$field = $dataProps[ intval( $input['iSortCol_'.$i] ) ];
$order = ( $input['sSortDir_'.$i]=='desc' ? -1 : 1 );
$sort_fields[$field] = $order;
}
}
$cursor->sort($sort_fields);
}
$output = array(
"sEcho" => intval($input['sEcho']),
"iTotalRecords" => $m_collection->count(),
"iTotalDisplayRecords" => $cursor->count(),
"aaData" => array()
);
foreach ( $cursor as $doc ) {
$output['aaData'][] = $doc;
}
echo json_encode( $output );

html file is:
$(document).ready(function() {
// Setup - add a text input to each footer cell
$('#edi_view tfoot th').each( function () {
var title = $('#edi_view thead th').eq( $(this).index() ).text();
$(this).html( '<input type="text" placeholder="Search '+title+'" />' );
} );

var dt =$('#edi_view').DataTable( {
sDom: 'T<"clear">lfrtip',
"oTableTools": {
"sSwfPath": "../../extensions/TableTools-2.2.2/swf/copy_csv_xls_pdf.swf" ,
"aButtons": [ "copy",
{"sExtends": "csv","oSelectorOpts": { filter: 'applied', order: 'current' },"sFileName": "EdiList.txt"},
{"sExtends": "xls","sFileName": "EdiList.xls"},
{
"sExtends": "pdf",
"sPdfOrientation": "landscape",
"mColumns": "hidden",
"sFieldBoundary": "",
"sFieldSeperator": "~",
"fnClick": function( nButton, oConfig, flash )
{ this.fnSetText( flash,
"title:EDI History Export\n"+
"colWidth:"+ this.fnCalcColRatios(oConfig) +"\n"+
"orientation:"+ oConfig.sPdfOrientation +"\n"+
"size:"+ oConfig.sPdfSize +"\n"+
"--/TableToolsOpts--\n" +
this.fnGetTableData(oConfig));
}
}]
},
"processing": true,
"serverSide": true,
"sAjaxSource": "scripts/server_processing_m.php",
"deferRender": true,
"fnServerData": function ( sSource, aoData, fnCallback, oSettings ) {
oSettings.jqXHR = $.ajax( {
"dataType": 'json',
"type": "GET",
"url": sSource,
"data": aoData,
"cache": true,
"timeout": 300000,
"success": fnCallback } );
},
"aoColumns": [
{
"class": "details-control",
"bSortable": false,
"bSearchable": false,
"data": null,
"sDefaultContent": ""
},
{"bSearchable": true, "mDataProp": "file_id" },
{ "bSearchable": true,"mDataProp": "company" },
{ "bSearchable": true,"mDataProp": "processing_code" },
{ "bSearchable": true,"mDataProp": "trading_partner" },
{ "bSearchable": true,"mDataProp": "receive_date" },
{"bSearchable": true, "mDataProp": "functional_group" },
{ "bSearchable": true,"mDataProp": "transaction_type" },
{ "bSearchable": true,"mDataProp": "control_number" },
{"bVisible": false, "bSearchable": true, "mDataProp": "data" }
],
"order": [[1, 'asc']]
} );
var detailRows = [];

    $('#edi_view tbody').on( 'click', 'tr td:first-child', function () {
            var tr = $(this).closest('tr');
            var row = dt.row( tr );
            var idx = $.inArray( tr.attr('id'), detailRows );

            if ( row.child.isShown() ) {
                    tr.removeClass( 'details' );
                    row.child.hide();

                    // Remove from the 'open' array
                    detailRows.splice( idx, 1 );
            }
            else {
                    tr.addClass( 'details' );
            row.child( format( row.data() ) ).show();

                    if ( idx === -1 ) {
                            detailRows.push( tr.attr('id') );
                    }
            }
    } );

    dt.on( 'draw', function () { 
            $.each( detailRows, function ( i, id ) {
                    $('#'+id+' td:first-child').trigger( 'click' );
            } );

    dt.columns().eq( 0 ).each( function ( colIdx ) {
            $( 'input', dt.column( colIdx ).footer() ).on( 'keyup change', function () {
                   dt 
                            .column( colIdx )
                            .search( this.value )
                            .draw();
            } );

});
});

Answers

  • allanallan Posts: 63,725Questions: 1Answers: 10,506 Site admin

    If you are using server-side processing, then the performance is primarily down to the server and querying the data. How long does the Ajax request take to be answered? You might need to make some modifications to the server-side script to optimise it.

    Allan

  • blue80blue80 Posts: 8Questions: 4Answers: 0

    Thank you very much for the quick reply. I am an Oracle dba and quick new for open source. Let me try to answer your question. If I have less rows (less than 1m), any query comes back in reasonable time (from 1 to 2 digit seconds). I downloaded the server_side code for mongodb from this side. I agree I should tune it, but really don't know how.
    As the data column is too big, I put it as not visible, but searchable. So when I put any search string to search for data column, it seems it tries to search all columns ($searchTermsAny?).
    Would you please give any idea how to tune it?
    Thanks

  • blue80blue80 Posts: 8Questions: 4Answers: 0

    Hi, Allan:
    In order to avoid searching in all fields when I only want to search in data field, I changed the detailed field to searchable as
    "aoColumns": [
    {
    "class": "details-control",
    "bSortable": false,
    "sWidth": "15%",
    "bSearchable": true,
    "bRegex": true,
    "data": null,
    "sDefaultContent": ""
    },
    (the detail field only show the data field) so I can do the individual column search only on data column. But the search result still never comes back (processing).
    I also read through the server_side php. To me, this code is very briefly, neatly and precisely written. should be very efficient.
    Is there anything you can help? Or datatables cannot just handle such situation (over 5millions rows with text column which is up to 16m each).
    Actually, when I loaded my data, I excluded those records whose text field is greater than 16m because mongodb standard collection's limit. I have to load those to GridFS. That's another challenge. I don't know it datatables can handle GridFS
    Thanks in advance for any help.

  • allanallan Posts: 63,725Questions: 1Answers: 10,506 Site admin

    $searchTermsAny - I'm afraid I have no idea what that variable is. But it sounds like you want to update the script to consider the columns[i][searchable] parameter that is sent to the server for each column in the table - see: http://datatables.net/manual/server-side#Sent-parameters .

    Or datatables cannot just handle such situation (over 5millions rows with text column which is up to 16m each).

    I've seen DataTables being used with 20 million+ rows of data. As I noted, it is really down to the server-side implementation when you are using server-side processing. In that mode DataTables itself is just an events and display controller.

    Allan

This discussion has been closed.